Unverified Commit a9767290 authored by asynchronous rob's avatar asynchronous rob Committed by GitHub
Browse files

Initial erasure-coding of availability data (#56)

* erasure-coding block data

* adjust error handling

* merkleize chunks and yield branches for each

* construction and proving of merkle branches

* port over to new GF(2^16) impl

* some tests for wrapped_shard

* handle extra byte from GF(2^16) better

* point to github dependency

* add issue link

* point to master for reed-solomon-erasure

* add missing license header
parent 138a91d5
Pipeline #29116 failed with stages
in 12 minutes and 5 seconds
......@@ -2060,6 +2060,17 @@ dependencies = [
"tokio 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "polkadot-erasure-coding"
version = "0.1.0"
dependencies = [
"parity-codec 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"polkadot-primitives 0.1.0",
"reed-solomon-erasure 4.0.0 (git+https://github.com/paritytech/reed-solomon-erasure)",
"substrate-primitives 0.1.0 (git+https://github.com/paritytech/substrate)",
"substrate-trie 0.4.0 (git+https://github.com/paritytech/substrate)",
]
[[package]]
name = "polkadot-executor"
version = "0.1.0"
......@@ -2416,6 +2427,14 @@ dependencies = [
"redox_syscall 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "reed-solomon-erasure"
version = "4.0.0"
source = "git+https://github.com/paritytech/reed-solomon-erasure#63c609beaef0f8174a9a21f058d7d3e46c3a762c"
dependencies = [
"smallvec 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.0.6"
......@@ -4583,6 +4602,7 @@ dependencies = [
"checksum rayon-core 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b055d1e92aba6877574d8fe604a63c8b5df60f60e5982bf7ccbb1338ea527356"
"checksum redox_syscall 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "cf8fb82a4d1c9b28f1c26c574a5b541f5ffb4315f6c9a791fa47b6a04438fe93"
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum reed-solomon-erasure 4.0.0 (git+https://github.com/paritytech/reed-solomon-erasure)" = "<none>"
"checksum regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ee84f70c8c08744ea9641a731c7fadb475bf2ecc52d7f627feb833e0b3990467"
"checksum regex-syntax 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fbc557aac2b708fe84121caf261346cc2eed71978024337e42eb46b8a252ac6e"
"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
......
......@@ -23,6 +23,7 @@ members = [
"cli",
"collator",
"consensus",
"erasure-coding",
"executor",
"network",
"primitives",
......
[package]
name = "polkadot-erasure-coding"
version = "0.1.0"
authors = ["Parity Technologies <admin@parity.io>"]
edition = "2018"
[dependencies]
polkadot-primitives = { path = "../primitives" }
reed-solomon-erasure = { git = "https://github.com/paritytech/reed-solomon-erasure" }
parity-codec = "2.1"
substrate-primitives = { git = "https://github.com/paritytech/substrate" }
substrate-trie = { git = "https://github.com/paritytech/substrate" }
// Copyright 2018 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! As part of Polkadot's availability system, certain pieces of data
//! for each block are required to be kept available.
//!
//! The way we accomplish this is by erasure coding the data into n pieces
//! and constructing a merkle root of the data.
//!
//! Each of n validators stores their piece of data. We assume n=3f+k, k < 3.
//! f is the maximum number of faulty vaildators in the system.
//! The data is coded so any f+1 chunks can be used to reconstruct the full data.
extern crate polkadot_primitives as primitives;
extern crate reed_solomon_erasure as reed_solomon;
extern crate parity_codec as codec;
extern crate substrate_primitives;
extern crate substrate_trie as trie;
use codec::{Encode, Decode};
use reed_solomon::galois_16::{self, ReedSolomon};
use primitives::{Hash as H256, BlakeTwo256, HashT};
use primitives::parachain::{BlockData, Extrinsic};
use substrate_primitives::Blake2Hasher;
use trie::{MemoryDB, Trie, TrieMut, TrieDB, TrieDBMut};
use self::wrapped_shard::WrappedShard;
mod wrapped_shard;
// we are limited to the field order of GF(2^16), which is 65536
const MAX_VALIDATORS: usize = <galois_16::Field as reed_solomon::Field>::ORDER;
/// Errors in erasure coding.
#[derive(Debug, Clone)]
pub enum Error {
/// Returned when there are too many validators.
TooManyValidators,
/// Cannot encode something for no validators
EmptyValidators,
/// Cannot reconstruct: wrong number of validators.
WrongValidatorCount,
/// Not enough chunks present.
NotEnoughChunks,
/// Too many chunks present.
TooManyChunks,
/// Chunks not of uniform length or the chunks are empty.
NonUniformChunks,
/// An uneven byte-length of a shard is not valid for GF(2^16) encoding.
UnevenLength,
/// Chunk index out of bounds.
ChunkIndexOutOfBounds(usize, usize),
/// Bad payload in reconstructed bytes.
BadPayload,
/// Invalid branch proof.
InvalidBranchProof,
/// Branch out of bounds.
BranchOutOfBounds,
}
struct CodeParams {
data_shards: usize,
parity_shards: usize,
}
impl CodeParams {
// the shard length needed for a payload with initial size `base_len`.
fn shard_len(&self, base_len: usize) -> usize {
(base_len / self.data_shards) + (base_len % self.data_shards)
}
fn make_shards_for(&self, payload: &[u8]) -> Vec<WrappedShard> {
let shard_len = self.shard_len(payload.len());
let mut shards = vec![
WrappedShard::new(vec![0; shard_len + 4]);
self.data_shards + self.parity_shards
];
for (data_chunk, blank_shard) in payload.chunks(shard_len).zip(&mut shards) {
let blank_shard: &mut [u8] = blank_shard.as_mut();
let (len_slice, blank_shard) = blank_shard.split_at_mut(4);
let len = ::std::cmp::min(data_chunk.len(), blank_shard.len());
// prepend the length to each data shard. this will tell us how much
// we need to read.
//
// this is necessary because we are doing RS encoding with 16-bit words,
// but the payload is a byte-slice. We need to know how much data
// to read from each shard when reconstructing.
//
// TODO: could be done more efficiently by pushing extra bytes onto the
// end. https://github.com/paritytech/polkadot/issues/88
(len as u32).using_encoded(|s| {
len_slice.copy_from_slice(s)
});
// fill the empty shards with the corresponding piece of the payload,
// zero-padded to fit in the shards.
blank_shard[..len].copy_from_slice(&data_chunk[..len]);
}
shards
}
// make a reed-solomon instance.
fn make_encoder(&self) -> ReedSolomon {
ReedSolomon::new(self.data_shards, self.parity_shards)
.expect("this struct is not created with invalid shard number; qed")
}
}
fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
if n_validators > MAX_VALIDATORS { return Err(Error::TooManyValidators) }
if n_validators == 0 { return Err(Error::EmptyValidators) }
let n_faulty = n_validators.saturating_sub(1) / 3;
let n_good = n_validators - n_faulty;
Ok(CodeParams {
data_shards: n_faulty + 1,
parity_shards: n_good - 1,
})
}
/// Obtain erasure-coded chunks, one for each validator.
///
/// Works only up to 256 validators, and `n_validators` must be non-zero.
pub fn obtain_chunks(n_validators: usize, block_data: &BlockData, extrinsic: &Extrinsic)
-> Result<Vec<Vec<u8>>, Error>
{
let params = code_params(n_validators)?;
let encoded = (block_data, extrinsic).encode();
if encoded.is_empty() {
return Err(Error::BadPayload);
}
let mut shards = params.make_shards_for(&encoded[..]);
params.make_encoder().encode(&mut shards[..])
.expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed");
Ok(shards.into_iter().map(|w| w.into_inner()).collect())
}
/// Reconstruct the block data from a set of chunks.
///
/// Provide an iterator containing chunk data and the corresponding index.
/// The indices of the present chunks must be indicated. If too few chunks
/// are provided, recovery is not possible.
///
/// Works only up to 256 validators, and `n_validators` must be non-zero.
pub fn reconstruct<'a, I: 'a>(n_validators: usize, chunks: I)
-> Result<(BlockData, Extrinsic), Error>
where I: IntoIterator<Item=(&'a [u8], usize)>
{
let params = code_params(n_validators)?;
let mut shards: Vec<Option<WrappedShard>> = vec![None; n_validators];
let mut shard_len = None;
for (chunk_data, chunk_idx) in chunks.into_iter().take(n_validators) {
if chunk_idx >= n_validators {
return Err(Error::ChunkIndexOutOfBounds(chunk_idx, n_validators));
}
let shard_len = shard_len.get_or_insert_with(|| chunk_data.len());
if *shard_len % 2 != 0 {
return Err(Error::UnevenLength);
}
if *shard_len != chunk_data.len() || *shard_len == 0 {
return Err(Error::NonUniformChunks);
}
shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
}
if let Err(e) = params.make_encoder().reconstruct(&mut shards[..]) {
match e {
reed_solomon::Error::TooFewShardsPresent => Err(Error::NotEnoughChunks)?,
reed_solomon::Error::InvalidShardFlags => Err(Error::WrongValidatorCount)?,
reed_solomon::Error::TooManyShards => Err(Error::TooManyChunks)?,
reed_solomon::Error::EmptyShard => panic!("chunks are all non-empty; this is checked above; qed"),
reed_solomon::Error::IncorrectShardSize => panic!("chunks are all same len; this is checked above; qed"),
_ => panic!("reed_solomon encoder returns no more variants for this function; qed"),
}
}
// lazily decode from the data shards.
Decode::decode(&mut ShardInput {
shards: shards.iter()
.map(|x| x.as_ref())
.take(params.data_shards)
.map(|x| x.expect("all data shards have been recovered; qed"))
.filter_map(|x| {
let mut s: &[u8] = x.as_ref();
let data_len = u32::decode(&mut s)? as usize;
// NOTE: s has been mutated to point forward by `decode`.
if s.len() < data_len {
None
} else {
Some(&s[..data_len])
}
}),
cur_shard: None,
}).ok_or_else(|| Error::BadPayload)
}
/// An iterator that yields merkle branches and chunk data for all chunks to
/// be sent to other validators.
pub struct Branches<'a> {
trie_storage: MemoryDB<Blake2Hasher>,
root: H256,
chunks: Vec<&'a [u8]>,
current_pos: usize,
}
impl<'a> Branches<'a> {
/// Get the trie root.
pub fn root(&self) -> H256 { self.root.clone() }
}
impl<'a> Iterator for Branches<'a> {
type Item = (Vec<Vec<u8>>, &'a [u8]);
fn next(&mut self) -> Option<Self::Item> {
use trie::Recorder;
let trie = TrieDB::new(&self.trie_storage, &self.root)
.expect("`Branches` is only created with a valid memorydb that contains all nodes for the trie with given root; qed");
let mut recorder = Recorder::new();
let res = (self.current_pos as u32).using_encoded(|s|
trie.get_with(s, &mut recorder)
);
match res.expect("all nodes in trie present; qed") {
Some(_) => {
let nodes = recorder.drain().into_iter().map(|r| r.data).collect();
let chunk = &self.chunks.get(self.current_pos)
.expect("there is a one-to-one mapping of chunks to valid merkle branches; qed");
self.current_pos += 1;
Some((nodes, chunk))
}
None => None,
}
}
}
/// Construct a trie from chunks of an erasure-coded value. This returns the root hash and an
/// iterator of merkle proofs, one for each validator.
pub fn branches<'a>(chunks: Vec<&'a [u8]>) -> Branches<'a> {
let mut trie_storage: MemoryDB<Blake2Hasher> = MemoryDB::default();
let mut root = H256::default();
// construct trie mapping each chunk's index to its hash.
{
let mut trie = TrieDBMut::new(&mut trie_storage, &mut root);
for (i, &chunk) in chunks.iter().enumerate() {
(i as u32).using_encoded(|encoded_index| {
let chunk_hash = BlakeTwo256::hash(chunk);
trie.insert(encoded_index, chunk_hash.as_ref())
.expect("a fresh trie stored in memory cannot have errors loading nodes; qed");
})
}
}
Branches {
trie_storage,
root,
chunks,
current_pos: 0,
}
}
/// Verify a markle branch, yielding the chunk hash meant to be present at that
/// index.
pub fn branch_hash(root: &H256, branch_nodes: &[Vec<u8>], index: usize) -> Result<H256, Error> {
let mut trie_storage: MemoryDB<Blake2Hasher> = MemoryDB::default();
for node in branch_nodes.iter() {
(&mut trie_storage as &mut trie::HashDB<_>).insert(node.as_slice());
}
let trie = TrieDB::new(&trie_storage, &root).map_err(|_| Error::InvalidBranchProof)?;
let res = (index as u32).using_encoded(|key|
trie.get_with(key, |raw_hash: &[u8]| H256::decode(&mut &raw_hash[..]))
);
match res {
Ok(Some(Some(hash))) => Ok(hash),
Ok(Some(None)) => Err(Error::InvalidBranchProof), // hash failed to decode
Ok(None) => Err(Error::BranchOutOfBounds),
Err(_) => Err(Error::InvalidBranchProof),
}
}
// input for `parity_codec` which draws data from the data shards
struct ShardInput<'a, I> {
shards: I,
cur_shard: Option<(&'a [u8], usize)>,
}
impl<'a, I: Iterator<Item=&'a [u8]>> codec::Input for ShardInput<'a, I> {
fn read(&mut self, into: &mut [u8]) -> usize {
let mut read_bytes = 0;
loop {
if read_bytes == into.len() { break }
let cur_shard = self.cur_shard.take().or_else(|| self.shards.next().map(|s| (s, 0)));
let (active_shard, mut in_shard) = match cur_shard {
Some((s, i)) => (s, i),
None => break,
};
if in_shard >= active_shard.len() {
continue;
}
let remaining_len_out = into.len() - read_bytes;
let remaining_len_shard = active_shard.len() - in_shard;
let write_len = std::cmp::min(remaining_len_out, remaining_len_shard);
into[read_bytes..][..write_len]
.copy_from_slice(&active_shard[in_shard..][..write_len]);
in_shard += write_len;
read_bytes += write_len;
self.cur_shard = Some((active_shard, in_shard))
}
read_bytes
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn field_order_is_right_size() {
assert_eq!(MAX_VALIDATORS, 65536);
}
#[test]
fn round_trip_block_data() {
let block_data = BlockData((0..255).collect());
let ex = Extrinsic { outgoing_messages: Vec::new() };
let chunks = obtain_chunks(
10,
&block_data,
&ex,
).unwrap();
assert_eq!(chunks.len(), 10);
// any 4 chunks should work.
let reconstructed = reconstruct(
10,
[
(&*chunks[1], 1),
(&*chunks[4], 4),
(&*chunks[6], 6),
(&*chunks[9], 9),
].iter().cloned(),
).unwrap();
assert_eq!(reconstructed, (block_data, ex));
}
#[test]
fn construct_valid_branches() {
let block_data = BlockData(vec![2; 256]);
let chunks = obtain_chunks(
10,
&block_data,
&Extrinsic { outgoing_messages: Vec::new() },
).unwrap();
let chunks: Vec<_> = chunks.iter().map(|c| &c[..]).collect();
assert_eq!(chunks.len(), 10);
let branches = branches(chunks.clone());
let root = branches.root();
let proofs: Vec<_> = branches.map(|(proof, _)| proof).collect();
assert_eq!(proofs.len(), 10);
for (i, proof) in proofs.into_iter().enumerate() {
assert_eq!(branch_hash(&root, &proof, i).unwrap(), BlakeTwo256::hash(chunks[i]));
}
}
}
// Copyright 2019 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Provides a safe wrapper that gives views into a byte-vec.
/// Wrapper around a `Vec<u8>` that provides views as a `[u8]` and `[[u8; 2]]`.
#[derive(Clone)]
pub(crate) struct WrappedShard {
inner: Vec<u8>,
}
impl WrappedShard {
/// Wrap `data`.
pub(crate) fn new(mut data: Vec<u8>) -> Self {
if data.len() % 2 != 0 {
data.push(0);
}
WrappedShard { inner: data }
}
/// Unwrap and yield inner data.
pub(crate) fn into_inner(self) -> Vec<u8> {
self.inner
}
}
impl AsRef<[u8]> for WrappedShard {
fn as_ref(&self) -> &[u8] {
self.inner.as_ref()
}
}
impl AsMut<[u8]> for WrappedShard {
fn as_mut(&mut self) -> &mut [u8] {
self.inner.as_mut()
}
}
impl AsRef<[[u8; 2]]> for WrappedShard {
fn as_ref(&self) -> &[[u8; 2]] {
assert_eq!(self.inner.len() % 2, 0);
if self.inner.is_empty() { return &[] }
unsafe {
::std::slice::from_raw_parts(&self.inner[0] as *const _ as _, self.inner.len() / 2)
}
}
}
impl AsMut<[[u8; 2]]> for WrappedShard {
fn as_mut(&mut self) -> &mut [[u8; 2]] {
let len = self.inner.len();
assert_eq!(len % 2, 0);
if self.inner.is_empty() { return &mut [] }
unsafe {
::std::slice::from_raw_parts_mut(&mut self.inner[0] as *mut _ as _, len / 2)
}
}
}
impl std::iter::FromIterator<[u8; 2]> for WrappedShard {
fn from_iter<I: IntoIterator<Item=[u8; 2]>>(iterable: I) -> Self {
let iter = iterable.into_iter();
let (l, _) = iter.size_hint();
let mut inner = Vec::with_capacity(l * 2);
for [a, b] in iter {
inner.push(a);
inner.push(b);
}
debug_assert_eq!(inner.len() % 2, 0);
WrappedShard { inner }
}
}
#[cfg(test)]
mod tests {
use super::WrappedShard;
#[test]
fn wrap_empty_ok() {
let mut wrapped = WrappedShard::new(Vec::new());
{
let _: &mut [u8] = wrapped.as_mut();
let _: &mut [[u8; 2]] = wrapped.as_mut();
}
{
let _: &[u8] = wrapped.as_ref();
let _: &[[u8; 2]] = wrapped.as_ref();
}
}
#[test]
fn data_order_preserved() {
let mut wrapped = WrappedShard::new(vec![1, 2, 3]);
{
let x: &[u8] = wrapped.as_ref();
assert_eq!(x, &[1, 2, 3, 0]);
}
{
let x: &mut [[u8; 2]] = wrapped.as_mut();
assert_eq!(x, &mut [[1, 2], [3, 0]]);
x[1] = [3, 4];
}
{
let x: &[u8] = wrapped.as_ref();
assert_eq!(x, &[1, 2, 3, 4]);
}
}
#[test]
fn from_iter() {
let w: WrappedShard = vec![[1, 2], [3, 4], [5, 6]].into_iter().collect();
let x: &[u8] = w.as_ref();
assert_eq!(x, &[1, 2, 3, 4, 5, 6])
}
}
......@@ -44,7 +44,9 @@ extern crate serde;