| { | ||
| "git": { | ||
| "sha1": "42b1c394e516ad4b12edd3090102f841d95dfda7" | ||
| "sha1": "9b756ec8097afc782d76f7aec0a5ac9f4b82329a" | ||
| }, | ||
| "path_in_vcs": "" | ||
| } |
+16
-2
@@ -15,3 +15,3 @@ # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO | ||
| name = "abao" | ||
| version = "0.1.2" | ||
| version = "0.2.0" | ||
| authors = [ | ||
@@ -78,3 +78,17 @@ "Jack O'Connor", | ||
| [features] | ||
| default = ["tokio_io"] | ||
| default = [ | ||
| "tokio_io", | ||
| "group_size_16k", | ||
| ] | ||
| group_size_1024k = [] | ||
| group_size_128k = [] | ||
| group_size_16k = [] | ||
| group_size_1k = [] | ||
| group_size_256k = [] | ||
| group_size_2k = [] | ||
| group_size_32k = [] | ||
| group_size_4k = [] | ||
| group_size_512k = [] | ||
| group_size_64k = [] | ||
| group_size_8k = [] | ||
| tokio_io = [ | ||
@@ -81,0 +95,0 @@ "tokio", |
+10
-0
| This is a fork of [https://github.com/oconnor663/bao] that adds async support and chunk groups. | ||
| # Chunk groups | ||
| As an extension of the original bao crate, this crate implements chunk groups. | ||
| Chunk groups can be selected using a (mutually exclusive) feature flag. | ||
| Supported chunk groups are from 1kb (1 chunk per group, compatible with the | ||
| orininal bao crate) to 1024kb (2^10 chunks per group). | ||
| The default is 16kb, so 2^4 chunks per group. | ||
| # <a href="#"><img src="docs/bao.svg" alt="Bao" height=100></a> [](https://github.com/n0-computer/abao/actions) [](https://docs.rs/abao) [](https://crates.io/crates/abao) | ||
@@ -4,0 +14,0 @@ |
+40
-38
@@ -40,3 +40,5 @@ //! Encode some input bytes into the Bao format, or slice an existing encoding. | ||
| use crate::Finalization::{self, NotRoot, Root}; | ||
| use crate::{Hash, ParentNode, CHUNK_SIZE, HASH_SIZE, HEADER_SIZE, MAX_DEPTH, PARENT_SIZE}; | ||
| use crate::{ | ||
| ChunkGroupState, Hash, ParentNode, GROUP_SIZE, HASH_SIZE, HEADER_SIZE, MAX_DEPTH, PARENT_SIZE, | ||
| }; | ||
| use arrayref::array_mut_ref; | ||
@@ -101,4 +103,4 @@ use arrayvec::ArrayVec; | ||
| // want to overflow when content_len is u64::MAX_VALUE. | ||
| let full_chunks: u64 = content_len / CHUNK_SIZE as u64; | ||
| let has_partial_chunk: bool = (content_len % CHUNK_SIZE as u64) != 0; | ||
| let full_chunks: u64 = content_len / GROUP_SIZE as u64; | ||
| let has_partial_chunk: bool = (content_len % GROUP_SIZE as u64) != 0; | ||
| cmp::max(1, full_chunks + has_partial_chunk as u64) | ||
@@ -108,4 +110,4 @@ } | ||
| pub(crate) fn chunk_size(chunk_index: u64, content_len: u64) -> usize { | ||
| let chunk_start = chunk_index * CHUNK_SIZE as u64; | ||
| cmp::min(CHUNK_SIZE, (content_len - chunk_start) as usize) | ||
| let chunk_start = chunk_index * GROUP_SIZE as u64; | ||
| cmp::min(GROUP_SIZE, (content_len - chunk_start) as usize) | ||
| } | ||
@@ -317,3 +319,3 @@ | ||
| fn needs_merge(&self) -> bool { | ||
| let chunks = self.total_len / CHUNK_SIZE as u64; | ||
| let chunks = self.total_len / GROUP_SIZE as u64; | ||
| self.subtrees.len() > chunks.count_ones() as usize | ||
@@ -416,3 +418,3 @@ } | ||
| inner: T, | ||
| chunk_state: blake3::guts::ChunkState, | ||
| chunk_state: ChunkGroupState, | ||
| tree_state: State, | ||
@@ -430,3 +432,3 @@ outboard: bool, | ||
| inner, | ||
| chunk_state: blake3::guts::ChunkState::new(0), | ||
| chunk_state: ChunkGroupState::new(0), | ||
| tree_state: State::new(), | ||
@@ -530,3 +532,3 @@ outboard: false, | ||
| if !self.outboard { | ||
| let mut chunk = [0; CHUNK_SIZE]; | ||
| let mut chunk = [0; GROUP_SIZE]; | ||
| self.inner | ||
@@ -565,8 +567,8 @@ .seek(SeekFrom::Start(read_cursor - size as u64))?; | ||
| // the tree state, and write out any completed parent nodes. | ||
| if self.chunk_state.len() == CHUNK_SIZE { | ||
| if self.chunk_state.len() == GROUP_SIZE { | ||
| // This can't be the root, because we know more input is coming. | ||
| let chunk_hash = self.chunk_state.finalize(false); | ||
| self.tree_state.push_subtree(&chunk_hash, CHUNK_SIZE); | ||
| let chunk_counter = self.tree_state.count() / CHUNK_SIZE as u64; | ||
| self.chunk_state = blake3::guts::ChunkState::new(chunk_counter); | ||
| self.tree_state.push_subtree(&chunk_hash, GROUP_SIZE); | ||
| let chunk_counter = self.tree_state.count() / GROUP_SIZE as u64; | ||
| self.chunk_state = ChunkGroupState::new(chunk_counter); | ||
| while let Some(parent) = self.tree_state.merge_parent() { | ||
@@ -578,3 +580,3 @@ self.inner.write_all(&parent)?; | ||
| // Add as many bytes as possible to the current chunk. | ||
| let want = CHUNK_SIZE - self.chunk_state.len(); | ||
| let want = GROUP_SIZE - self.chunk_state.len(); | ||
| let take = cmp::min(want, input.len()); | ||
@@ -632,3 +634,3 @@ if !self.outboard { | ||
| fn at_root(&self) -> bool { | ||
| self.content_position < CHUNK_SIZE as u64 && self.stack_depth == 1 | ||
| self.content_position < GROUP_SIZE as u64 && self.stack_depth == 1 | ||
| } | ||
@@ -660,3 +662,3 @@ | ||
| debug_assert!(!self.at_eof(), "not valid at EOF"); | ||
| self.content_position - (self.content_position % CHUNK_SIZE as u64) | ||
| self.content_position - (self.content_position % GROUP_SIZE as u64) | ||
| } | ||
@@ -666,3 +668,3 @@ | ||
| debug_assert!(!self.at_eof(), "not valid at EOF"); | ||
| self.content_position / CHUNK_SIZE as u64 | ||
| self.content_position / GROUP_SIZE as u64 | ||
| } | ||
@@ -713,4 +715,4 @@ | ||
| finalization: self.finalization(), | ||
| skip: (self.content_position % CHUNK_SIZE as u64) as usize, | ||
| index: self.content_position / CHUNK_SIZE as u64, | ||
| skip: (self.content_position % GROUP_SIZE as u64) as usize, | ||
| index: self.content_position / GROUP_SIZE as u64, | ||
| } | ||
@@ -803,3 +805,3 @@ } | ||
| let distance = seek_to - self.next_chunk_start(); | ||
| if distance < CHUNK_SIZE as u64 { | ||
| if distance < GROUP_SIZE as u64 { | ||
| if verifying_final_chunk { | ||
@@ -811,3 +813,3 @@ let size = (content_len - self.next_chunk_start()) as usize; | ||
| skip: size, // Skip the whole thing. | ||
| index: self.content_position / CHUNK_SIZE as u64, | ||
| index: self.content_position / GROUP_SIZE as u64, | ||
| }; | ||
@@ -826,3 +828,3 @@ } else { | ||
| .unwrap_or(0); | ||
| if downshifted_distance < CHUNK_SIZE as u64 { | ||
| if downshifted_distance < GROUP_SIZE as u64 { | ||
| debug_assert!(self.upcoming_parents > 0); | ||
@@ -836,3 +838,3 @@ return NextRead::Parent; | ||
| // this case. | ||
| let subtree_size = (CHUNK_SIZE as u64) << self.upcoming_parents; | ||
| let subtree_size = (GROUP_SIZE as u64) << self.upcoming_parents; | ||
| self.content_position = self.next_chunk_start() + subtree_size; | ||
@@ -895,3 +897,3 @@ self.encoding_position += encoded_subtree_size(subtree_size); | ||
| let size = chunk_size(self.next_chunk_index(), content_len); | ||
| let skip = self.content_position % CHUNK_SIZE as u64; | ||
| let skip = self.content_position % GROUP_SIZE as u64; | ||
| self.content_position += size as u64 - skip; | ||
@@ -1031,3 +1033,3 @@ self.encoding_position += size as u128; | ||
| /// // The slice includes some overhead to store the necessary subtree hashes. | ||
| /// assert_eq!(9096, slice.len()); | ||
| /// assert_eq!(16776, slice.len()); | ||
| /// # Ok(()) | ||
@@ -1044,3 +1046,3 @@ /// # } | ||
| parser: ParseState, | ||
| buf: [u8; CHUNK_SIZE], | ||
| buf: [u8; GROUP_SIZE], | ||
| buf_start: usize, | ||
@@ -1085,3 +1087,3 @@ buf_end: usize, | ||
| parser: ParseState::new(), | ||
| buf: [0; CHUNK_SIZE], | ||
| buf: [0; GROUP_SIZE], | ||
| buf_start: 0, | ||
@@ -1224,3 +1226,3 @@ buf_end: 0, | ||
| use super::*; | ||
| use crate::decode::make_test_input; | ||
| use crate::{decode::make_test_input, GROUP_LOG}; | ||
@@ -1296,3 +1298,3 @@ #[test] | ||
| for total_chunks in 1..100 { | ||
| let content_len = total_chunks * CHUNK_SIZE as u64; | ||
| let content_len = total_chunks * GROUP_SIZE as u64; | ||
| let pre_post_list = make_pre_post_list(total_chunks); | ||
@@ -1320,12 +1322,12 @@ for chunk in 0..total_chunks { | ||
| fn drive_state(mut input: &[u8]) -> Hash { | ||
| let last_chunk_is_root = input.len() <= CHUNK_SIZE; | ||
| let last_chunk_is_root = input.len() <= GROUP_SIZE; | ||
| let mut state = State::new(); | ||
| let mut chunk_index = 0; | ||
| while input.len() > CHUNK_SIZE { | ||
| let hash = blake3::guts::ChunkState::new(chunk_index) | ||
| .update(&input[..CHUNK_SIZE]) | ||
| while input.len() > GROUP_SIZE { | ||
| let hash = ChunkGroupState::new(chunk_index) | ||
| .update(&input[..GROUP_SIZE]) | ||
| .finalize(false); | ||
| chunk_index += 1; | ||
| state.push_subtree(&hash, CHUNK_SIZE); | ||
| input = &input[CHUNK_SIZE..]; | ||
| state.push_subtree(&hash, GROUP_SIZE); | ||
| input = &input[GROUP_SIZE..]; | ||
| // Merge any parents, but throw away the result. We don't need | ||
@@ -1335,3 +1337,3 @@ // them, but we need to avoid tripping an assert. | ||
| } | ||
| let hash = blake3::guts::ChunkState::new(chunk_index) | ||
| let hash = ChunkGroupState::new(chunk_index) | ||
| .update(input) | ||
@@ -1354,3 +1356,3 @@ .finalize(last_chunk_is_root); | ||
| fn test_state() { | ||
| let buf = [0x42; 65537]; | ||
| let buf = vec![0x42; 65537 << GROUP_LOG]; | ||
| for &case in crate::test::TEST_CASES { | ||
@@ -1404,3 +1406,3 @@ dbg!(case); | ||
| fn test_empty_write_after_one_chunk() { | ||
| let input = &[0; CHUNK_SIZE]; | ||
| let input = &[0; GROUP_SIZE]; | ||
| let mut output = Vec::new(); | ||
@@ -1407,0 +1409,0 @@ let mut encoder = Encoder::new(io::Cursor::new(&mut output)); |
+120
-20
@@ -53,6 +53,33 @@ //! [Repo](https://github.com/oconnor663/bao) — | ||
| pub const HASH_SIZE: usize = 32; | ||
| /// log2(GROUP_CHUNKS) | ||
| #[cfg(feature = "group_size_1k")] | ||
| pub(crate) const GROUP_LOG: usize = 0; | ||
| #[cfg(feature = "group_size_2k")] | ||
| pub(crate) const GROUP_LOG: usize = 1; | ||
| #[cfg(feature = "group_size_4k")] | ||
| pub(crate) const GROUP_LOG: usize = 2; | ||
| #[cfg(feature = "group_size_8k")] | ||
| pub(crate) const GROUP_LOG: usize = 3; | ||
| #[cfg(feature = "group_size_16k")] | ||
| pub(crate) const GROUP_LOG: usize = 4; | ||
| #[cfg(feature = "group_size_32k")] | ||
| pub(crate) const GROUP_LOG: usize = 5; | ||
| #[cfg(feature = "group_size_64k")] | ||
| pub(crate) const GROUP_LOG: usize = 6; | ||
| #[cfg(feature = "group_size_128k")] | ||
| pub(crate) const GROUP_LOG: usize = 7; | ||
| #[cfg(feature = "group_size_256k")] | ||
| pub(crate) const GROUP_LOG: usize = 8; | ||
| #[cfg(feature = "group_size_512k")] | ||
| pub(crate) const GROUP_LOG: usize = 9; | ||
| #[cfg(feature = "group_size_1024k")] | ||
| pub(crate) const GROUP_LOG: usize = 10; | ||
| /// The number of chunks in a chunk groups. Must be a power of 2. | ||
| pub(crate) const GROUP_CHUNKS: usize = 1 << GROUP_LOG; | ||
| /// The size of a chunk group in bytes. | ||
| pub(crate) const GROUP_SIZE: usize = GROUP_CHUNKS * CHUNK_SIZE; | ||
| pub(crate) const PARENT_SIZE: usize = 2 * HASH_SIZE; | ||
| pub(crate) const HEADER_SIZE: usize = 8; | ||
| pub(crate) const CHUNK_SIZE: usize = 1024; | ||
| pub(crate) const MAX_DEPTH: usize = 54; // 2^54 * CHUNK_SIZE = 2^64 | ||
| const CHUNK_SIZE: usize = 1024; | ||
| pub(crate) const MAX_DEPTH: usize = 64 - (GROUP_LOG + 10); // chunk size is 2^10 | ||
@@ -104,21 +131,94 @@ /// An array of `HASH_SIZE` bytes. This will be a wrapper type in a future version. | ||
| 10, | ||
| CHUNK_SIZE - 1, | ||
| CHUNK_SIZE, | ||
| CHUNK_SIZE + 1, | ||
| 2 * CHUNK_SIZE - 1, | ||
| 2 * CHUNK_SIZE, | ||
| 2 * CHUNK_SIZE + 1, | ||
| 3 * CHUNK_SIZE - 1, | ||
| 3 * CHUNK_SIZE, | ||
| 3 * CHUNK_SIZE + 1, | ||
| 4 * CHUNK_SIZE - 1, | ||
| 4 * CHUNK_SIZE, | ||
| 4 * CHUNK_SIZE + 1, | ||
| 8 * CHUNK_SIZE - 1, | ||
| 8 * CHUNK_SIZE, | ||
| 8 * CHUNK_SIZE + 1, | ||
| 16 * CHUNK_SIZE - 1, | ||
| 16 * CHUNK_SIZE, | ||
| 16 * CHUNK_SIZE + 1, | ||
| GROUP_SIZE - 1, | ||
| GROUP_SIZE, | ||
| GROUP_SIZE + 1, | ||
| 2 * GROUP_SIZE - 1, | ||
| 2 * GROUP_SIZE, | ||
| 2 * GROUP_SIZE + 1, | ||
| 3 * GROUP_SIZE - 1, | ||
| 3 * GROUP_SIZE, | ||
| 3 * GROUP_SIZE + 1, | ||
| 4 * GROUP_SIZE - 1, | ||
| 4 * GROUP_SIZE, | ||
| 4 * GROUP_SIZE + 1, | ||
| 8 * GROUP_SIZE - 1, | ||
| 8 * GROUP_SIZE, | ||
| 8 * GROUP_SIZE + 1, | ||
| 16 * GROUP_SIZE - 1, | ||
| 16 * GROUP_SIZE, | ||
| 16 * GROUP_SIZE + 1, | ||
| ]; | ||
| } | ||
| /// A state machine for hashing a chunk group, with the same API as | ||
| /// `blake3::guts::ChunkState`. This really should not exist but instead | ||
| /// call into blake3, but the required methods are not public. | ||
| /// | ||
| /// See https://github.com/BLAKE3-team/BLAKE3/tree/more_guts | ||
| #[derive(Clone, Debug)] | ||
| pub(crate) struct ChunkGroupState { | ||
| current_chunk: u64, | ||
| leaf_hashes: [Hash; GROUP_CHUNKS - 1], | ||
| current: blake3::guts::ChunkState, | ||
| } | ||
| impl ChunkGroupState { | ||
| pub fn new(group_counter: u64) -> Self { | ||
| let chunk_counter = group_counter << GROUP_LOG; | ||
| Self { | ||
| current_chunk: chunk_counter, | ||
| current: blake3::guts::ChunkState::new(chunk_counter), | ||
| leaf_hashes: [Hash::from([0; HASH_SIZE]); GROUP_CHUNKS - 1], | ||
| } | ||
| } | ||
| pub fn len(&self) -> usize { | ||
| self.hashes() * CHUNK_SIZE + self.current.len() | ||
| } | ||
| pub fn update(&mut self, input: &[u8]) -> &mut Self { | ||
| let mut input = input; | ||
| debug_assert!(self.len() + input.len() <= GROUP_SIZE); | ||
| while self.current.len() + input.len() > CHUNK_SIZE { | ||
| let remaining = CHUNK_SIZE - self.current.len(); | ||
| self.current.update(&input[..remaining]); | ||
| // we know this is not the root because there is more coming | ||
| self.leaf_hashes[self.hashes()] = self.current.finalize(false); | ||
| self.current_chunk += 1; | ||
| self.current = blake3::guts::ChunkState::new(self.current_chunk); | ||
| input = &input[remaining..]; | ||
| } | ||
| self.current.update(input); | ||
| self | ||
| } | ||
| pub fn finalize(&self, is_root: bool) -> Hash { | ||
| if self.hashes() == 0 { | ||
| // we have just current, so pass through is_root | ||
| self.current.finalize(is_root) | ||
| } else { | ||
| // todo: this works only for GROUP_CHUNKS == 2 | ||
| let mut leaf_hashes = [Hash::from([0; HASH_SIZE]); GROUP_CHUNKS]; | ||
| let n = self.hashes(); | ||
| leaf_hashes[..n].copy_from_slice(&self.leaf_hashes[..self.hashes()]); | ||
| leaf_hashes[n] = self.current.finalize(false); | ||
| combine_chunk_hashes(&leaf_hashes[..n + 1], is_root) | ||
| } | ||
| } | ||
| /// number of leaf hashes we have already computed | ||
| fn hashes(&self) -> usize { | ||
| (self.current_chunk & ((GROUP_CHUNKS as u64) - 1)) as usize | ||
| } | ||
| } | ||
| fn combine_chunk_hashes(chunks: &[Hash], is_root: bool) -> Hash { | ||
| if chunks.len() == 1 { | ||
| chunks[0] | ||
| } else { | ||
| let mid = chunks.len().next_power_of_two() / 2; | ||
| let left = combine_chunk_hashes(&chunks[..mid], false); | ||
| let right = combine_chunk_hashes(&chunks[mid..], false); | ||
| blake3::guts::parent_cv(&left, &right, is_root) | ||
| } | ||
| } |
+71
-47
@@ -83,2 +83,3 @@ #! /usr/bin/env python3 | ||
| CHUNK_SIZE = 1024 | ||
| GROUP_SIZE = 16 * CHUNK_SIZE | ||
| KEY_SIZE = 32 | ||
@@ -216,8 +217,31 @@ HASH_SIZE = 32 | ||
| # Verify a chunk chaining value with a constant-time comparison. | ||
| def verify_chunk(expected_cv, chunk_bytes, chunk_index, finalization): | ||
| found_cv = chunk_chaining_value(chunk_bytes, chunk_index, finalization) | ||
| assert hmac.compare_digest(expected_cv, found_cv), "hash mismatch" | ||
| # Left subtrees contain the largest possible power of two chunks, with at least | ||
| # one byte left for the right subtree. | ||
| def left_len(parent_len): | ||
| available_chunks = (parent_len - 1) // CHUNK_SIZE | ||
| power_of_two_chunks = 2 ** (available_chunks.bit_length() - 1) | ||
| return CHUNK_SIZE * power_of_two_chunks | ||
| # Compute the chaining value of a subtree recursively. Although we could use | ||
| # this to hash entire inputs in memory, in this implementation we only use it | ||
| # to hash chunk groups in group_chaining_value() immediately below. | ||
| def subtree_chaining_value(subtree_bytes, starting_chunk_index, finalization): | ||
| if len(subtree_bytes) <= CHUNK_SIZE: | ||
| return chunk_chaining_value(subtree_bytes, starting_chunk_index, finalization) | ||
| llen = left_len(len(subtree_bytes)) | ||
| chunk_index = starting_chunk_index | ||
| left_cv = subtree_chaining_value(subtree_bytes[:llen], chunk_index, NOT_ROOT) | ||
| chunk_index += llen // CHUNK_SIZE | ||
| right_cv = subtree_chaining_value(subtree_bytes[llen:], chunk_index, NOT_ROOT) | ||
| return parent_chaining_value(left_cv + right_cv, finalization) | ||
| # Compute the chaining value of a group of chunks, up to GROUP_SIZE bytes. | ||
| def group_chaining_value(group_bytes, group_index, finalization): | ||
| assert len(group_bytes) <= GROUP_SIZE | ||
| starting_chunk_index = group_index * (GROUP_SIZE // CHUNK_SIZE) | ||
| return subtree_chaining_value(group_bytes, starting_chunk_index, finalization) | ||
| # Verify a parent node chaining value with a constant-time comparison. | ||
@@ -229,2 +253,8 @@ def verify_parent(expected_cv, parent_bytes, finalization): | ||
| # Verify a chunk group chaining value with a constant-time comparison. | ||
| def verify_group(expected_cv, group_bytes, group_index, finalization): | ||
| found_cv = group_chaining_value(group_bytes, group_index, finalization) | ||
| assert hmac.compare_digest(expected_cv, found_cv), "hash mismatch" | ||
| # The standard read() function is allowed to return fewer bytes than requested | ||
@@ -252,20 +282,12 @@ # for a number of different reasons, including but not limited to EOF. To | ||
| # Left subtrees contain the largest possible power of two chunks, with at least | ||
| # one byte left for the right subtree. | ||
| def left_len(parent_len): | ||
| available_chunks = (parent_len - 1) // CHUNK_SIZE | ||
| power_of_two_chunks = 2 ** (available_chunks.bit_length() - 1) | ||
| return CHUNK_SIZE * power_of_two_chunks | ||
| def bao_encode(buf, *, outboard=False): | ||
| chunk_index = 0 | ||
| group_index = 0 | ||
| def encode_recurse(buf, finalization): | ||
| nonlocal chunk_index | ||
| if len(buf) <= CHUNK_SIZE: | ||
| chunk_cv = chunk_chaining_value(buf, chunk_index, finalization) | ||
| chunk_encoded = b"" if outboard else buf | ||
| chunk_index += 1 | ||
| return chunk_encoded, chunk_cv | ||
| nonlocal group_index | ||
| if len(buf) <= GROUP_SIZE: | ||
| group_cv = group_chaining_value(buf, group_index, finalization) | ||
| group_encoded = b"" if outboard else buf | ||
| group_index += 1 | ||
| return group_encoded, group_cv | ||
| llen = left_len(len(buf)) | ||
@@ -288,11 +310,11 @@ # Interior nodes have no len suffix. | ||
| tree_stream = outboard_stream or input_stream | ||
| chunk_index = 0 | ||
| group_index = 0 | ||
| def decode_recurse(subtree_cv, content_len, finalization): | ||
| nonlocal chunk_index | ||
| if content_len <= CHUNK_SIZE: | ||
| chunk = read_exact(input_stream, content_len) | ||
| verify_chunk(subtree_cv, chunk, chunk_index, finalization) | ||
| chunk_index += 1 | ||
| output_stream.write(chunk) | ||
| nonlocal group_index | ||
| if content_len <= GROUP_SIZE: | ||
| group = read_exact(input_stream, content_len) | ||
| verify_group(subtree_cv, group, group_index, finalization) | ||
| group_index += 1 | ||
| output_stream.write(group) | ||
| else: | ||
@@ -312,3 +334,5 @@ parent = read_exact(tree_stream, PARENT_SIZE) | ||
| # This is identical to the BLAKE3 hash function. | ||
| # This is identical to the BLAKE3 hash function. Note that this works in terms | ||
| # of chunks rather than groups, to emphasize that grouping/pruning doesn't | ||
| # affect the root hash. | ||
| def bao_hash(input_stream): | ||
@@ -348,13 +372,13 @@ buf = b"" | ||
| # Round up to the next full chunk, and remember that the empty tree still | ||
| # counts as one chunk. | ||
| def count_chunks(content_len): | ||
| # Round up to the next full group, and remember that the empty tree still | ||
| # counts as one group. | ||
| def count_groups(content_len): | ||
| if content_len == 0: | ||
| return 1 | ||
| return (content_len + CHUNK_SIZE - 1) // CHUNK_SIZE | ||
| return (content_len + GROUP_SIZE - 1) // GROUP_SIZE | ||
| # A subtree of N chunks always has N-1 parent nodes. | ||
| # A subtree of N groups always has N-1 parent nodes. | ||
| def encoded_subtree_size(content_len, outboard=False): | ||
| parents_size = PARENT_SIZE * (count_chunks(content_len) - 1) | ||
| parents_size = PARENT_SIZE * (count_groups(content_len) - 1) | ||
| return parents_size if outboard else parents_size + content_len | ||
@@ -392,8 +416,8 @@ | ||
| pass | ||
| elif subtree_len <= CHUNK_SIZE: | ||
| # The current subtree is just a chunk. Read the whole thing. The | ||
| elif subtree_len <= GROUP_SIZE: | ||
| # The current subtree is just one group. Read the whole thing. The | ||
| # recipient will need the whole thing to verify its hash, | ||
| # regardless of whether it overlaps slice_end. | ||
| chunk = read_exact(input_stream, subtree_len) | ||
| output_stream.write(chunk) | ||
| group = read_exact(input_stream, subtree_len) | ||
| output_stream.write(group) | ||
| else: | ||
@@ -436,3 +460,3 @@ # We need to read a parent node and recurse into the current | ||
| # Check content_len before skipping subtrees, to be sure we don't skip | ||
| # validating the empty chunk. | ||
| # validating the empty chunk / empty group. | ||
| if subtree_end <= slice_start and content_len > 0: | ||
@@ -444,12 +468,12 @@ # This subtree isn't part of the slice. Keep going. | ||
| pass | ||
| elif subtree_len <= CHUNK_SIZE: | ||
| # The current subtree is just a chunk. Verify the whole thing, and | ||
| # then output however many bytes we need. | ||
| chunk = read_exact(input_stream, subtree_len) | ||
| chunk_index = subtree_start // CHUNK_SIZE | ||
| verify_chunk(subtree_cv, chunk, chunk_index, finalization) | ||
| chunk_start = max(0, min(subtree_len, slice_start - subtree_start)) | ||
| chunk_end = max(0, min(subtree_len, slice_end - subtree_start)) | ||
| elif subtree_len <= GROUP_SIZE: | ||
| # The current subtree is just one group. Verify the whole thing, | ||
| # and then output however many bytes we need. | ||
| group = read_exact(input_stream, subtree_len) | ||
| group_index = subtree_start // GROUP_SIZE | ||
| verify_group(subtree_cv, group, group_index, finalization) | ||
| group_start = max(0, min(subtree_len, slice_start - subtree_start)) | ||
| group_end = max(0, min(subtree_len, slice_end - subtree_start)) | ||
| if not skip_output: | ||
| output_stream.write(chunk[chunk_start:chunk_end]) | ||
| output_stream.write(group[group_start:group_end]) | ||
| else: | ||
@@ -456,0 +480,0 @@ # We need to read a parent node and recurse into the current |
@@ -11,3 +11,3 @@ #! /usr/bin/env python3 | ||
| import bao | ||
| from bao import CHUNK_SIZE, HEADER_SIZE, PARENT_SIZE | ||
| from bao import CHUNK_SIZE, GROUP_SIZE, HEADER_SIZE, PARENT_SIZE | ||
| from generate_input import input_bytes | ||
@@ -21,12 +21,13 @@ | ||
| CHUNK_SIZE + 1, | ||
| 2 * CHUNK_SIZE - 1, | ||
| 2 * CHUNK_SIZE, | ||
| 2 * CHUNK_SIZE + 1, | ||
| 3 * CHUNK_SIZE - 1, | ||
| 3 * CHUNK_SIZE, | ||
| 3 * CHUNK_SIZE + 1, | ||
| # The first case that has chunks at three different depths. | ||
| 11 * CHUNK_SIZE, | ||
| GROUP_SIZE - 1, | ||
| GROUP_SIZE, | ||
| GROUP_SIZE + 1, | ||
| 2 * GROUP_SIZE, | ||
| 3 * GROUP_SIZE, | ||
| # The first case that has chunk groups at three different depths. | ||
| 11 * GROUP_SIZE, | ||
| # The first case that has a depth jump greater than one. | ||
| 13 * CHUNK_SIZE, | ||
| 13 * GROUP_SIZE, | ||
| ] | ||
@@ -49,6 +50,6 @@ | ||
| # Return the first byte of the header, of each parent, and of each chunk. | ||
| # Return the first byte of the header, of each parent, and of each chunk group. | ||
| def encode_corruption_points(content_len, outboard=False): | ||
| def recurse(subtree_start, subtree_len, offset, ret): | ||
| if subtree_len <= CHUNK_SIZE: | ||
| if subtree_len <= GROUP_SIZE: | ||
| if subtree_len != 0 and not outboard: | ||
@@ -95,3 +96,3 @@ ret.append(offset) | ||
| input_corruptions.append(corruption) | ||
| corruption += CHUNK_SIZE | ||
| corruption += GROUP_SIZE | ||
| fields = [ | ||
@@ -118,3 +119,3 @@ ("input_len", size), | ||
| offsets.append(offset) | ||
| offset += CHUNK_SIZE | ||
| offset += GROUP_SIZE | ||
| if size > 0: | ||
@@ -147,5 +148,5 @@ offsets.append(size - 1) | ||
| return 0 | ||
| elif subtree_len <= CHUNK_SIZE: | ||
| # The current subtree is a chunk. Add its first byte, as long as | ||
| # it's not the empty chunk. | ||
| elif subtree_len <= GROUP_SIZE: | ||
| # The current subtree is one chunk group. Add its first byte, as | ||
| # long as it's not the empty chunk. | ||
| if subtree_len != 0: | ||
@@ -183,3 +184,3 @@ ret.append(offset) | ||
| for offset in offsets: | ||
| for slice_len in [0, CHUNK_SIZE]: | ||
| for slice_len in [0, GROUP_SIZE]: | ||
| slice_bytes = io.BytesIO() | ||
@@ -186,0 +187,0 @@ bao.bao_slice(io.BytesIO(encoded), slice_bytes, offset, slice_len) |
+21
-20
| //! The tests in this file run bao against the standard set of test vectors. | ||
| use abao::Hash; | ||
| use abao as bao; | ||
| use bao::Hash; | ||
| use lazy_static::lazy_static; | ||
@@ -111,3 +112,3 @@ use serde::{Deserialize, Serialize}; | ||
| let input = make_input(case.input_len); | ||
| let (encoded, hash) = abao::encode::encode(&input); | ||
| let (encoded, hash) = bao::encode::encode(&input); | ||
| assert_eq!(&*case.bao_hash, &*hash.to_hex()); | ||
@@ -118,7 +119,7 @@ assert_eq!( | ||
| ); | ||
| let encoded_size = abao::encode::encoded_size(case.input_len as u64) as usize; | ||
| let encoded_size = bao::encode::encoded_size(case.input_len as u64) as usize; | ||
| assert_eq!(encoded_size, encoded.len()); | ||
| // Test decoding. | ||
| let output = abao::decode::decode(&encoded, &hash).unwrap(); | ||
| let output = bao::decode::decode(&encoded, &hash).unwrap(); | ||
| assert_eq!(input, output); | ||
@@ -128,3 +129,3 @@ | ||
| let bad_hash = corrupt_hash(&hash); | ||
| let err = abao::decode::decode(&encoded, &bad_hash).unwrap_err(); | ||
| let err = bao::decode::decode(&encoded, &bad_hash).unwrap_err(); | ||
| assert_eq!(std::io::ErrorKind::InvalidData, err.kind()); | ||
@@ -139,3 +140,3 @@ | ||
| // was corrupted. | ||
| abao::decode::decode(&corrupt, &hash).unwrap_err(); | ||
| bao::decode::decode(&corrupt, &hash).unwrap_err(); | ||
| } | ||
@@ -146,3 +147,3 @@ } | ||
| fn decode_outboard(input: &[u8], outboard: &[u8], hash: &Hash) -> io::Result<Vec<u8>> { | ||
| let mut reader = abao::decode::Decoder::new_outboard(input, outboard, hash); | ||
| let mut reader = bao::decode::Decoder::new_outboard(input, outboard, hash); | ||
| let mut output = Vec::with_capacity(input.len()); | ||
@@ -158,3 +159,3 @@ reader.read_to_end(&mut output)?; | ||
| let input = make_input(case.input_len); | ||
| let (outboard, hash) = abao::encode::outboard(&input); | ||
| let (outboard, hash) = bao::encode::outboard(&input); | ||
| assert_eq!(&*case.bao_hash, &*hash.to_hex()); | ||
@@ -165,3 +166,3 @@ assert_eq!( | ||
| ); | ||
| let outboard_size = abao::encode::outboard_size(case.input_len as u64) as usize; | ||
| let outboard_size = bao::encode::outboard_size(case.input_len as u64) as usize; | ||
| assert_eq!(outboard_size, outboard.len()); | ||
@@ -204,4 +205,4 @@ | ||
| let input = make_input(case.input_len); | ||
| let (encoded, hash) = abao::encode::encode(&input); | ||
| let (outboard, outboard_hash) = abao::encode::outboard(&input); | ||
| let (encoded, hash) = bao::encode::encode(&input); | ||
| let (outboard, outboard_hash) = bao::encode::outboard(&input); | ||
| assert_eq!(hash, outboard_hash); | ||
@@ -217,3 +218,3 @@ | ||
| // Test seeking in the combined mode. | ||
| let mut combined_reader = abao::decode::Decoder::new(Cursor::new(&encoded), &hash); | ||
| let mut combined_reader = bao::decode::Decoder::new(Cursor::new(&encoded), &hash); | ||
| combined_reader | ||
@@ -227,3 +228,3 @@ .seek(io::SeekFrom::Start(seek as u64)) | ||
| // Test seeking in the outboard mode. | ||
| let mut outboard_reader = abao::decode::Decoder::new_outboard( | ||
| let mut outboard_reader = bao::decode::Decoder::new_outboard( | ||
| Cursor::new(&input), | ||
@@ -251,5 +252,5 @@ Cursor::new(&outboard), | ||
| } | ||
| let mut combined_reader = abao::decode::Decoder::new(Cursor::new(&encoded), &hash); | ||
| let mut combined_reader = bao::decode::Decoder::new(Cursor::new(&encoded), &hash); | ||
| let mut outboard_reader = | ||
| abao::decode::Decoder::new_outboard(Cursor::new(&input), Cursor::new(&outboard), &hash); | ||
| bao::decode::Decoder::new_outboard(Cursor::new(&input), Cursor::new(&outboard), &hash); | ||
| println!(); | ||
@@ -284,3 +285,3 @@ for &seek in &repeated_seeks { | ||
| fn decode_slice(slice: &[u8], hash: &Hash, start: u64, len: u64) -> io::Result<Vec<u8>> { | ||
| let mut reader = abao::decode::SliceDecoder::new(slice, hash, start, len); | ||
| let mut reader = bao::decode::SliceDecoder::new(slice, hash, start, len); | ||
| let mut output = Vec::new(); | ||
@@ -296,4 +297,4 @@ reader.read_to_end(&mut output)?; | ||
| let input = make_input(case.input_len); | ||
| let (encoded, hash) = abao::encode::encode(&input); | ||
| let (outboard, outboard_hash) = abao::encode::outboard(&input); | ||
| let (encoded, hash) = bao::encode::encode(&input); | ||
| let (outboard, outboard_hash) = bao::encode::outboard(&input); | ||
| assert_eq!(hash, outboard_hash); | ||
@@ -309,3 +310,3 @@ | ||
| let mut combined_extractor = | ||
| abao::encode::SliceExtractor::new(Cursor::new(&encoded), slice.start, slice.len); | ||
| bao::encode::SliceExtractor::new(Cursor::new(&encoded), slice.start, slice.len); | ||
| let mut combined_slice = Vec::new(); | ||
@@ -320,3 +321,3 @@ combined_extractor.read_to_end(&mut combined_slice).unwrap(); | ||
| // Make sure slicing the outboard encoding also gives the right output. | ||
| let mut outboard_extractor = abao::encode::SliceExtractor::new_outboard( | ||
| let mut outboard_extractor = bao::encode::SliceExtractor::new_outboard( | ||
| Cursor::new(&input), | ||
@@ -323,0 +324,0 @@ Cursor::new(&outboard), |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display