abao - crates.io Package Compare versions

+1

-1

.cargo_vcs_info.json

		{
		"git": {
		"sha1": "42b1c394e516ad4b12edd3090102f841d95dfda7"
		"sha1": "9b756ec8097afc782d76f7aec0a5ac9f4b82329a"
		},
		"path_in_vcs": ""
		}

+16

-2

Cargo.toml

		@@ -15,3 +15,3 @@ # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
		name = "abao"
		version = "0.1.2"
		version = "0.2.0"
		authors = [
		@@ -78,3 +78,17 @@ "Jack O'Connor",
		[features]
		default = ["tokio_io"]
		default = [
		"tokio_io",
		"group_size_16k",
		]
		group_size_1024k = []
		group_size_128k = []
		group_size_16k = []
		group_size_1k = []
		group_size_256k = []
		group_size_2k = []
		group_size_32k = []
		group_size_4k = []
		group_size_512k = []
		group_size_64k = []
		group_size_8k = []
		tokio_io = [
		@@ -81,0 +95,0 @@ "tokio",

+10

-0

README.md

		This is a fork of [https://github.com/oconnor663/bao] that adds async support and chunk groups.

		# Chunk groups

		As an extension of the original bao crate, this crate implements chunk groups.
		Chunk groups can be selected using a (mutually exclusive) feature flag.

		Supported chunk groups are from 1kb (1 chunk per group, compatible with the
		orininal bao crate) to 1024kb (2^10 chunks per group).

		The default is 16kb, so 2^4 chunks per group.

		# <a href="#"><img src="docs/bao.svg" alt="Bao" height=100></a>   [![Actions Status](https://github.com/n0-computer/abao/workflows/tests/badge.svg)](https://github.com/n0-computer/abao/actions) [![docs.rs](https://docs.rs/abao/badge.svg)](https://docs.rs/abao) [![crates.io](https://img.shields.io/crates/v/abao.svg)](https://crates.io/crates/abao)
		@@ -4,0 +14,0 @@

+40

-38

src/encode.rs

		@@ -40,3 +40,5 @@ //! Encode some input bytes into the Bao format, or slice an existing encoding.
		use crate::Finalization::{self, NotRoot, Root};
		use crate::{Hash, ParentNode, CHUNK_SIZE, HASH_SIZE, HEADER_SIZE, MAX_DEPTH, PARENT_SIZE};
		use crate::{
		ChunkGroupState, Hash, ParentNode, GROUP_SIZE, HASH_SIZE, HEADER_SIZE, MAX_DEPTH, PARENT_SIZE,
		};
		use arrayref::array_mut_ref;
		@@ -101,4 +103,4 @@ use arrayvec::ArrayVec;
		// want to overflow when content_len is u64::MAX_VALUE.
		let full_chunks: u64 = content_len / CHUNK_SIZE as u64;
		let has_partial_chunk: bool = (content_len % CHUNK_SIZE as u64) != 0;
		let full_chunks: u64 = content_len / GROUP_SIZE as u64;
		let has_partial_chunk: bool = (content_len % GROUP_SIZE as u64) != 0;
		cmp::max(1, full_chunks + has_partial_chunk as u64)
		@@ -108,4 +110,4 @@ }
		pub(crate) fn chunk_size(chunk_index: u64, content_len: u64) -> usize {
		let chunk_start = chunk_index * CHUNK_SIZE as u64;
		cmp::min(CHUNK_SIZE, (content_len - chunk_start) as usize)
		let chunk_start = chunk_index * GROUP_SIZE as u64;
		cmp::min(GROUP_SIZE, (content_len - chunk_start) as usize)
		}
		@@ -317,3 +319,3 @@
		fn needs_merge(&self) -> bool {
		let chunks = self.total_len / CHUNK_SIZE as u64;
		let chunks = self.total_len / GROUP_SIZE as u64;
		self.subtrees.len() > chunks.count_ones() as usize
		@@ -416,3 +418,3 @@ }
		inner: T,
		chunk_state: blake3::guts::ChunkState,
		chunk_state: ChunkGroupState,
		tree_state: State,
		@@ -430,3 +432,3 @@ outboard: bool,
		inner,
		chunk_state: blake3::guts::ChunkState::new(0),
		chunk_state: ChunkGroupState::new(0),
		tree_state: State::new(),
		@@ -530,3 +532,3 @@ outboard: false,
		if !self.outboard {
		let mut chunk = [0; CHUNK_SIZE];
		let mut chunk = [0; GROUP_SIZE];
		self.inner
		@@ -565,8 +567,8 @@ .seek(SeekFrom::Start(read_cursor - size as u64))?;
		// the tree state, and write out any completed parent nodes.
		if self.chunk_state.len() == CHUNK_SIZE {
		if self.chunk_state.len() == GROUP_SIZE {
		// This can't be the root, because we know more input is coming.
		let chunk_hash = self.chunk_state.finalize(false);
		self.tree_state.push_subtree(&chunk_hash, CHUNK_SIZE);
		let chunk_counter = self.tree_state.count() / CHUNK_SIZE as u64;
		self.chunk_state = blake3::guts::ChunkState::new(chunk_counter);
		self.tree_state.push_subtree(&chunk_hash, GROUP_SIZE);
		let chunk_counter = self.tree_state.count() / GROUP_SIZE as u64;
		self.chunk_state = ChunkGroupState::new(chunk_counter);
		while let Some(parent) = self.tree_state.merge_parent() {
		@@ -578,3 +580,3 @@ self.inner.write_all(&parent)?;
		// Add as many bytes as possible to the current chunk.
		let want = CHUNK_SIZE - self.chunk_state.len();
		let want = GROUP_SIZE - self.chunk_state.len();
		let take = cmp::min(want, input.len());
		@@ -632,3 +634,3 @@ if !self.outboard {
		fn at_root(&self) -> bool {
		self.content_position < CHUNK_SIZE as u64 && self.stack_depth == 1
		self.content_position < GROUP_SIZE as u64 && self.stack_depth == 1
		}
		@@ -660,3 +662,3 @@
		debug_assert!(!self.at_eof(), "not valid at EOF");
		self.content_position - (self.content_position % CHUNK_SIZE as u64)
		self.content_position - (self.content_position % GROUP_SIZE as u64)
		}
		@@ -666,3 +668,3 @@
		debug_assert!(!self.at_eof(), "not valid at EOF");
		self.content_position / CHUNK_SIZE as u64
		self.content_position / GROUP_SIZE as u64
		}
		@@ -713,4 +715,4 @@
		finalization: self.finalization(),
		skip: (self.content_position % CHUNK_SIZE as u64) as usize,
		index: self.content_position / CHUNK_SIZE as u64,
		skip: (self.content_position % GROUP_SIZE as u64) as usize,
		index: self.content_position / GROUP_SIZE as u64,
		}
		@@ -803,3 +805,3 @@ }
		let distance = seek_to - self.next_chunk_start();
		if distance < CHUNK_SIZE as u64 {
		if distance < GROUP_SIZE as u64 {
		if verifying_final_chunk {
		@@ -811,3 +813,3 @@ let size = (content_len - self.next_chunk_start()) as usize;
		skip: size, // Skip the whole thing.
		index: self.content_position / CHUNK_SIZE as u64,
		index: self.content_position / GROUP_SIZE as u64,
		};
		@@ -826,3 +828,3 @@ } else {
		.unwrap_or(0);
		if downshifted_distance < CHUNK_SIZE as u64 {
		if downshifted_distance < GROUP_SIZE as u64 {
		debug_assert!(self.upcoming_parents > 0);
		@@ -836,3 +838,3 @@ return NextRead::Parent;
		// this case.
		let subtree_size = (CHUNK_SIZE as u64) << self.upcoming_parents;
		let subtree_size = (GROUP_SIZE as u64) << self.upcoming_parents;
		self.content_position = self.next_chunk_start() + subtree_size;
		@@ -895,3 +897,3 @@ self.encoding_position += encoded_subtree_size(subtree_size);
		let size = chunk_size(self.next_chunk_index(), content_len);
		let skip = self.content_position % CHUNK_SIZE as u64;
		let skip = self.content_position % GROUP_SIZE as u64;
		self.content_position += size as u64 - skip;
		@@ -1031,3 +1033,3 @@ self.encoding_position += size as u128;
		/// // The slice includes some overhead to store the necessary subtree hashes.
		/// assert_eq!(9096, slice.len());
		/// assert_eq!(16776, slice.len());
		/// # Ok(())
		@@ -1044,3 +1046,3 @@ /// # }
		parser: ParseState,
		buf: [u8; CHUNK_SIZE],
		buf: [u8; GROUP_SIZE],
		buf_start: usize,
		@@ -1085,3 +1087,3 @@ buf_end: usize,
		parser: ParseState::new(),
		buf: [0; CHUNK_SIZE],
		buf: [0; GROUP_SIZE],
		buf_start: 0,
		@@ -1224,3 +1226,3 @@ buf_end: 0,
		use super::*;
		use crate::decode::make_test_input;
		use crate::{decode::make_test_input, GROUP_LOG};

		@@ -1296,3 +1298,3 @@ #[test]
		for total_chunks in 1..100 {
		let content_len = total_chunks * CHUNK_SIZE as u64;
		let content_len = total_chunks * GROUP_SIZE as u64;
		let pre_post_list = make_pre_post_list(total_chunks);
		@@ -1320,12 +1322,12 @@ for chunk in 0..total_chunks {
		fn drive_state(mut input: &[u8]) -> Hash {
		let last_chunk_is_root = input.len() <= CHUNK_SIZE;
		let last_chunk_is_root = input.len() <= GROUP_SIZE;
		let mut state = State::new();
		let mut chunk_index = 0;
		while input.len() > CHUNK_SIZE {
		let hash = blake3::guts::ChunkState::new(chunk_index)
		.update(&input[..CHUNK_SIZE])
		while input.len() > GROUP_SIZE {
		let hash = ChunkGroupState::new(chunk_index)
		.update(&input[..GROUP_SIZE])
		.finalize(false);
		chunk_index += 1;
		state.push_subtree(&hash, CHUNK_SIZE);
		input = &input[CHUNK_SIZE..];
		state.push_subtree(&hash, GROUP_SIZE);
		input = &input[GROUP_SIZE..];
		// Merge any parents, but throw away the result. We don't need
		@@ -1335,3 +1337,3 @@ // them, but we need to avoid tripping an assert.
		}
		let hash = blake3::guts::ChunkState::new(chunk_index)
		let hash = ChunkGroupState::new(chunk_index)
		.update(input)
		@@ -1354,3 +1356,3 @@ .finalize(last_chunk_is_root);
		fn test_state() {
		let buf = [0x42; 65537];
		let buf = vec![0x42; 65537 << GROUP_LOG];
		for &case in crate::test::TEST_CASES {
		@@ -1404,3 +1406,3 @@ dbg!(case);
		fn test_empty_write_after_one_chunk() {
		let input = &[0; CHUNK_SIZE];
		let input = &[0; GROUP_SIZE];
		let mut output = Vec::new();
		@@ -1407,0 +1409,0 @@ let mut encoder = Encoder::new(io::Cursor::new(&mut output));

+120

-20

src/lib.rs

		@@ -53,6 +53,33 @@ //! [Repo](https://github.com/oconnor663/bao) —
		pub const HASH_SIZE: usize = 32;
		/// log2(GROUP_CHUNKS)
		#[cfg(feature = "group_size_1k")]
		pub(crate) const GROUP_LOG: usize = 0;
		#[cfg(feature = "group_size_2k")]
		pub(crate) const GROUP_LOG: usize = 1;
		#[cfg(feature = "group_size_4k")]
		pub(crate) const GROUP_LOG: usize = 2;
		#[cfg(feature = "group_size_8k")]
		pub(crate) const GROUP_LOG: usize = 3;
		#[cfg(feature = "group_size_16k")]
		pub(crate) const GROUP_LOG: usize = 4;
		#[cfg(feature = "group_size_32k")]
		pub(crate) const GROUP_LOG: usize = 5;
		#[cfg(feature = "group_size_64k")]
		pub(crate) const GROUP_LOG: usize = 6;
		#[cfg(feature = "group_size_128k")]
		pub(crate) const GROUP_LOG: usize = 7;
		#[cfg(feature = "group_size_256k")]
		pub(crate) const GROUP_LOG: usize = 8;
		#[cfg(feature = "group_size_512k")]
		pub(crate) const GROUP_LOG: usize = 9;
		#[cfg(feature = "group_size_1024k")]
		pub(crate) const GROUP_LOG: usize = 10;
		/// The number of chunks in a chunk groups. Must be a power of 2.
		pub(crate) const GROUP_CHUNKS: usize = 1 << GROUP_LOG;
		/// The size of a chunk group in bytes.
		pub(crate) const GROUP_SIZE: usize = GROUP_CHUNKS * CHUNK_SIZE;
		pub(crate) const PARENT_SIZE: usize = 2 * HASH_SIZE;
		pub(crate) const HEADER_SIZE: usize = 8;
		pub(crate) const CHUNK_SIZE: usize = 1024;
		pub(crate) const MAX_DEPTH: usize = 54; // 2^54 * CHUNK_SIZE = 2^64
		const CHUNK_SIZE: usize = 1024;
		pub(crate) const MAX_DEPTH: usize = 64 - (GROUP_LOG + 10); // chunk size is 2^10

		@@ -104,21 +131,94 @@ /// An array of `HASH_SIZE` bytes. This will be a wrapper type in a future version.
		10,
		CHUNK_SIZE - 1,
		CHUNK_SIZE,
		CHUNK_SIZE + 1,
		2 * CHUNK_SIZE - 1,
		2 * CHUNK_SIZE,
		2 * CHUNK_SIZE + 1,
		3 * CHUNK_SIZE - 1,
		3 * CHUNK_SIZE,
		3 * CHUNK_SIZE + 1,
		4 * CHUNK_SIZE - 1,
		4 * CHUNK_SIZE,
		4 * CHUNK_SIZE + 1,
		8 * CHUNK_SIZE - 1,
		8 * CHUNK_SIZE,
		8 * CHUNK_SIZE + 1,
		16 * CHUNK_SIZE - 1,
		16 * CHUNK_SIZE,
		16 * CHUNK_SIZE + 1,
		GROUP_SIZE - 1,
		GROUP_SIZE,
		GROUP_SIZE + 1,
		2 * GROUP_SIZE - 1,
		2 * GROUP_SIZE,
		2 * GROUP_SIZE + 1,
		3 * GROUP_SIZE - 1,
		3 * GROUP_SIZE,
		3 * GROUP_SIZE + 1,
		4 * GROUP_SIZE - 1,
		4 * GROUP_SIZE,
		4 * GROUP_SIZE + 1,
		8 * GROUP_SIZE - 1,
		8 * GROUP_SIZE,
		8 * GROUP_SIZE + 1,
		16 * GROUP_SIZE - 1,
		16 * GROUP_SIZE,
		16 * GROUP_SIZE + 1,
		];
		}

		/// A state machine for hashing a chunk group, with the same API as
		/// `blake3::guts::ChunkState`. This really should not exist but instead
		/// call into blake3, but the required methods are not public.
		///
		/// See https://github.com/BLAKE3-team/BLAKE3/tree/more_guts
		#[derive(Clone, Debug)]
		pub(crate) struct ChunkGroupState {
		current_chunk: u64,
		leaf_hashes: [Hash; GROUP_CHUNKS - 1],
		current: blake3::guts::ChunkState,
		}

		impl ChunkGroupState {
		pub fn new(group_counter: u64) -> Self {
		let chunk_counter = group_counter << GROUP_LOG;
		Self {
		current_chunk: chunk_counter,
		current: blake3::guts::ChunkState::new(chunk_counter),
		leaf_hashes: [Hash::from([0; HASH_SIZE]); GROUP_CHUNKS - 1],
		}
		}

		pub fn len(&self) -> usize {
		self.hashes() * CHUNK_SIZE + self.current.len()
		}

		pub fn update(&mut self, input: &[u8]) -> &mut Self {
		let mut input = input;
		debug_assert!(self.len() + input.len() <= GROUP_SIZE);
		while self.current.len() + input.len() > CHUNK_SIZE {
		let remaining = CHUNK_SIZE - self.current.len();
		self.current.update(&input[..remaining]);
		// we know this is not the root because there is more coming
		self.leaf_hashes[self.hashes()] = self.current.finalize(false);
		self.current_chunk += 1;
		self.current = blake3::guts::ChunkState::new(self.current_chunk);
		input = &input[remaining..];
		}
		self.current.update(input);
		self
		}

		pub fn finalize(&self, is_root: bool) -> Hash {
		if self.hashes() == 0 {
		// we have just current, so pass through is_root
		self.current.finalize(is_root)
		} else {
		// todo: this works only for GROUP_CHUNKS == 2
		let mut leaf_hashes = [Hash::from([0; HASH_SIZE]); GROUP_CHUNKS];
		let n = self.hashes();
		leaf_hashes[..n].copy_from_slice(&self.leaf_hashes[..self.hashes()]);
		leaf_hashes[n] = self.current.finalize(false);
		combine_chunk_hashes(&leaf_hashes[..n + 1], is_root)
		}
		}

		/// number of leaf hashes we have already computed
		fn hashes(&self) -> usize {
		(self.current_chunk & ((GROUP_CHUNKS as u64) - 1)) as usize
		}
		}

		fn combine_chunk_hashes(chunks: &[Hash], is_root: bool) -> Hash {
		if chunks.len() == 1 {
		chunks[0]
		} else {
		let mid = chunks.len().next_power_of_two() / 2;
		let left = combine_chunk_hashes(&chunks[..mid], false);
		let right = combine_chunk_hashes(&chunks[mid..], false);
		blake3::guts::parent_cv(&left, &right, is_root)
		}
		}

+71

-47

tests/bao.py

		@@ -83,2 +83,3 @@ #! /usr/bin/env python3
		CHUNK_SIZE = 1024
		GROUP_SIZE = 16 * CHUNK_SIZE
		KEY_SIZE = 32
		@@ -216,8 +217,31 @@ HASH_SIZE = 32

		# Verify a chunk chaining value with a constant-time comparison.
		def verify_chunk(expected_cv, chunk_bytes, chunk_index, finalization):
		found_cv = chunk_chaining_value(chunk_bytes, chunk_index, finalization)
		assert hmac.compare_digest(expected_cv, found_cv), "hash mismatch"
		# Left subtrees contain the largest possible power of two chunks, with at least
		# one byte left for the right subtree.
		def left_len(parent_len):
		available_chunks = (parent_len - 1) // CHUNK_SIZE
		power_of_two_chunks = 2 ** (available_chunks.bit_length() - 1)
		return CHUNK_SIZE * power_of_two_chunks


		# Compute the chaining value of a subtree recursively. Although we could use
		# this to hash entire inputs in memory, in this implementation we only use it
		# to hash chunk groups in group_chaining_value() immediately below.
		def subtree_chaining_value(subtree_bytes, starting_chunk_index, finalization):
		if len(subtree_bytes) <= CHUNK_SIZE:
		return chunk_chaining_value(subtree_bytes, starting_chunk_index, finalization)
		llen = left_len(len(subtree_bytes))
		chunk_index = starting_chunk_index
		left_cv = subtree_chaining_value(subtree_bytes[:llen], chunk_index, NOT_ROOT)
		chunk_index += llen // CHUNK_SIZE
		right_cv = subtree_chaining_value(subtree_bytes[llen:], chunk_index, NOT_ROOT)
		return parent_chaining_value(left_cv + right_cv, finalization)


		# Compute the chaining value of a group of chunks, up to GROUP_SIZE bytes.
		def group_chaining_value(group_bytes, group_index, finalization):
		assert len(group_bytes) <= GROUP_SIZE
		starting_chunk_index = group_index * (GROUP_SIZE // CHUNK_SIZE)
		return subtree_chaining_value(group_bytes, starting_chunk_index, finalization)


		# Verify a parent node chaining value with a constant-time comparison.
		@@ -229,2 +253,8 @@ def verify_parent(expected_cv, parent_bytes, finalization):

		# Verify a chunk group chaining value with a constant-time comparison.
		def verify_group(expected_cv, group_bytes, group_index, finalization):
		found_cv = group_chaining_value(group_bytes, group_index, finalization)
		assert hmac.compare_digest(expected_cv, found_cv), "hash mismatch"


		# The standard read() function is allowed to return fewer bytes than requested
		@@ -252,20 +282,12 @@ # for a number of different reasons, including but not limited to EOF. To

		# Left subtrees contain the largest possible power of two chunks, with at least
		# one byte left for the right subtree.
		def left_len(parent_len):
		available_chunks = (parent_len - 1) // CHUNK_SIZE
		power_of_two_chunks = 2 ** (available_chunks.bit_length() - 1)
		return CHUNK_SIZE * power_of_two_chunks


		def bao_encode(buf, *, outboard=False):
		chunk_index = 0
		group_index = 0

		def encode_recurse(buf, finalization):
		nonlocal chunk_index
		if len(buf) <= CHUNK_SIZE:
		chunk_cv = chunk_chaining_value(buf, chunk_index, finalization)
		chunk_encoded = b"" if outboard else buf
		chunk_index += 1
		return chunk_encoded, chunk_cv
		nonlocal group_index
		if len(buf) <= GROUP_SIZE:
		group_cv = group_chaining_value(buf, group_index, finalization)
		group_encoded = b"" if outboard else buf
		group_index += 1
		return group_encoded, group_cv
		llen = left_len(len(buf))
		@@ -288,11 +310,11 @@ # Interior nodes have no len suffix.
		tree_stream = outboard_stream or input_stream
		chunk_index = 0
		group_index = 0

		def decode_recurse(subtree_cv, content_len, finalization):
		nonlocal chunk_index
		if content_len <= CHUNK_SIZE:
		chunk = read_exact(input_stream, content_len)
		verify_chunk(subtree_cv, chunk, chunk_index, finalization)
		chunk_index += 1
		output_stream.write(chunk)
		nonlocal group_index
		if content_len <= GROUP_SIZE:
		group = read_exact(input_stream, content_len)
		verify_group(subtree_cv, group, group_index, finalization)
		group_index += 1
		output_stream.write(group)
		else:
		@@ -312,3 +334,5 @@ parent = read_exact(tree_stream, PARENT_SIZE)

		# This is identical to the BLAKE3 hash function.
		# This is identical to the BLAKE3 hash function. Note that this works in terms
		# of chunks rather than groups, to emphasize that grouping/pruning doesn't
		# affect the root hash.
		def bao_hash(input_stream):
		@@ -348,13 +372,13 @@ buf = b""

		# Round up to the next full chunk, and remember that the empty tree still
		# counts as one chunk.
		def count_chunks(content_len):
		# Round up to the next full group, and remember that the empty tree still
		# counts as one group.
		def count_groups(content_len):
		if content_len == 0:
		return 1
		return (content_len + CHUNK_SIZE - 1) // CHUNK_SIZE
		return (content_len + GROUP_SIZE - 1) // GROUP_SIZE


		# A subtree of N chunks always has N-1 parent nodes.
		# A subtree of N groups always has N-1 parent nodes.
		def encoded_subtree_size(content_len, outboard=False):
		parents_size = PARENT_SIZE * (count_chunks(content_len) - 1)
		parents_size = PARENT_SIZE * (count_groups(content_len) - 1)
		return parents_size if outboard else parents_size + content_len
		@@ -392,8 +416,8 @@
		pass
		elif subtree_len <= CHUNK_SIZE:
		# The current subtree is just a chunk. Read the whole thing. The
		elif subtree_len <= GROUP_SIZE:
		# The current subtree is just one group. Read the whole thing. The
		# recipient will need the whole thing to verify its hash,
		# regardless of whether it overlaps slice_end.
		chunk = read_exact(input_stream, subtree_len)
		output_stream.write(chunk)
		group = read_exact(input_stream, subtree_len)
		output_stream.write(group)
		else:
		@@ -436,3 +460,3 @@ # We need to read a parent node and recurse into the current
		# Check content_len before skipping subtrees, to be sure we don't skip
		# validating the empty chunk.
		# validating the empty chunk / empty group.
		if subtree_end <= slice_start and content_len > 0:
		@@ -444,12 +468,12 @@ # This subtree isn't part of the slice. Keep going.
		pass
		elif subtree_len <= CHUNK_SIZE:
		# The current subtree is just a chunk. Verify the whole thing, and
		# then output however many bytes we need.
		chunk = read_exact(input_stream, subtree_len)
		chunk_index = subtree_start // CHUNK_SIZE
		verify_chunk(subtree_cv, chunk, chunk_index, finalization)
		chunk_start = max(0, min(subtree_len, slice_start - subtree_start))
		chunk_end = max(0, min(subtree_len, slice_end - subtree_start))
		elif subtree_len <= GROUP_SIZE:
		# The current subtree is just one group. Verify the whole thing,
		# and then output however many bytes we need.
		group = read_exact(input_stream, subtree_len)
		group_index = subtree_start // GROUP_SIZE
		verify_group(subtree_cv, group, group_index, finalization)
		group_start = max(0, min(subtree_len, slice_start - subtree_start))
		group_end = max(0, min(subtree_len, slice_end - subtree_start))
		if not skip_output:
		output_stream.write(chunk[chunk_start:chunk_end])
		output_stream.write(group[group_start:group_end])
		else:
		@@ -456,0 +480,0 @@ # We need to read a parent node and recurse into the current

+17

-16

tests/generate_vectors.py

		@@ -11,3 +11,3 @@ #! /usr/bin/env python3
		import bao
		from bao import CHUNK_SIZE, HEADER_SIZE, PARENT_SIZE
		from bao import CHUNK_SIZE, GROUP_SIZE, HEADER_SIZE, PARENT_SIZE
		from generate_input import input_bytes
		@@ -21,12 +21,13 @@
		CHUNK_SIZE + 1,
		2 * CHUNK_SIZE - 1,
		2 * CHUNK_SIZE,
		2 * CHUNK_SIZE + 1,
		3 * CHUNK_SIZE - 1,
		3 * CHUNK_SIZE,
		3 * CHUNK_SIZE + 1,
		# The first case that has chunks at three different depths.
		11 * CHUNK_SIZE,
		GROUP_SIZE - 1,
		GROUP_SIZE,
		GROUP_SIZE + 1,
		2 * GROUP_SIZE,
		3 * GROUP_SIZE,
		# The first case that has chunk groups at three different depths.
		11 * GROUP_SIZE,
		# The first case that has a depth jump greater than one.
		13 * CHUNK_SIZE,
		13 * GROUP_SIZE,
		]
		@@ -49,6 +50,6 @@

		# Return the first byte of the header, of each parent, and of each chunk.
		# Return the first byte of the header, of each parent, and of each chunk group.
		def encode_corruption_points(content_len, outboard=False):
		def recurse(subtree_start, subtree_len, offset, ret):
		if subtree_len <= CHUNK_SIZE:
		if subtree_len <= GROUP_SIZE:
		if subtree_len != 0 and not outboard:
		@@ -95,3 +96,3 @@ ret.append(offset)
		input_corruptions.append(corruption)
		corruption += CHUNK_SIZE
		corruption += GROUP_SIZE
		fields = [
		@@ -118,3 +119,3 @@ ("input_len", size),
		offsets.append(offset)
		offset += CHUNK_SIZE
		offset += GROUP_SIZE
		if size > 0:
		@@ -147,5 +148,5 @@ offsets.append(size - 1)
		return 0
		elif subtree_len <= CHUNK_SIZE:
		# The current subtree is a chunk. Add its first byte, as long as
		# it's not the empty chunk.
		elif subtree_len <= GROUP_SIZE:
		# The current subtree is one chunk group. Add its first byte, as
		# long as it's not the empty chunk.
		if subtree_len != 0:
		@@ -183,3 +184,3 @@ ret.append(offset)
		for offset in offsets:
		for slice_len in [0, CHUNK_SIZE]:
		for slice_len in [0, GROUP_SIZE]:
		slice_bytes = io.BytesIO()
		@@ -186,0 +187,0 @@ bao.bao_slice(io.BytesIO(encoded), slice_bytes, offset, slice_len)

+21

-20

tests/vector_tests.rs

		//! The tests in this file run bao against the standard set of test vectors.

		use abao::Hash;
		use abao as bao;
		use bao::Hash;
		use lazy_static::lazy_static;
		@@ -111,3 +112,3 @@ use serde::{Deserialize, Serialize};
		let input = make_input(case.input_len);
		let (encoded, hash) = abao::encode::encode(&input);
		let (encoded, hash) = bao::encode::encode(&input);
		assert_eq!(&case.bao_hash, &hash.to_hex());
		@@ -118,7 +119,7 @@ assert_eq!(
		);
		let encoded_size = abao::encode::encoded_size(case.input_len as u64) as usize;
		let encoded_size = bao::encode::encoded_size(case.input_len as u64) as usize;
		assert_eq!(encoded_size, encoded.len());

		// Test decoding.
		let output = abao::decode::decode(&encoded, &hash).unwrap();
		let output = bao::decode::decode(&encoded, &hash).unwrap();
		assert_eq!(input, output);
		@@ -128,3 +129,3 @@
		let bad_hash = corrupt_hash(&hash);
		let err = abao::decode::decode(&encoded, &bad_hash).unwrap_err();
		let err = bao::decode::decode(&encoded, &bad_hash).unwrap_err();
		assert_eq!(std::io::ErrorKind::InvalidData, err.kind());
		@@ -139,3 +140,3 @@
		// was corrupted.
		abao::decode::decode(&corrupt, &hash).unwrap_err();
		bao::decode::decode(&corrupt, &hash).unwrap_err();
		}
		@@ -146,3 +147,3 @@ }
		fn decode_outboard(input: &[u8], outboard: &[u8], hash: &Hash) -> io::Result<Vec<u8>> {
		let mut reader = abao::decode::Decoder::new_outboard(input, outboard, hash);
		let mut reader = bao::decode::Decoder::new_outboard(input, outboard, hash);
		let mut output = Vec::with_capacity(input.len());
		@@ -158,3 +159,3 @@ reader.read_to_end(&mut output)?;
		let input = make_input(case.input_len);
		let (outboard, hash) = abao::encode::outboard(&input);
		let (outboard, hash) = bao::encode::outboard(&input);
		assert_eq!(&case.bao_hash, &hash.to_hex());
		@@ -165,3 +166,3 @@ assert_eq!(
		);
		let outboard_size = abao::encode::outboard_size(case.input_len as u64) as usize;
		let outboard_size = bao::encode::outboard_size(case.input_len as u64) as usize;
		assert_eq!(outboard_size, outboard.len());
		@@ -204,4 +205,4 @@
		let input = make_input(case.input_len);
		let (encoded, hash) = abao::encode::encode(&input);
		let (outboard, outboard_hash) = abao::encode::outboard(&input);
		let (encoded, hash) = bao::encode::encode(&input);
		let (outboard, outboard_hash) = bao::encode::outboard(&input);
		assert_eq!(hash, outboard_hash);
		@@ -217,3 +218,3 @@
		// Test seeking in the combined mode.
		let mut combined_reader = abao::decode::Decoder::new(Cursor::new(&encoded), &hash);
		let mut combined_reader = bao::decode::Decoder::new(Cursor::new(&encoded), &hash);
		combined_reader
		@@ -227,3 +228,3 @@ .seek(io::SeekFrom::Start(seek as u64))
		// Test seeking in the outboard mode.
		let mut outboard_reader = abao::decode::Decoder::new_outboard(
		let mut outboard_reader = bao::decode::Decoder::new_outboard(
		Cursor::new(&input),
		@@ -251,5 +252,5 @@ Cursor::new(&outboard),
		}
		let mut combined_reader = abao::decode::Decoder::new(Cursor::new(&encoded), &hash);
		let mut combined_reader = bao::decode::Decoder::new(Cursor::new(&encoded), &hash);
		let mut outboard_reader =
		abao::decode::Decoder::new_outboard(Cursor::new(&input), Cursor::new(&outboard), &hash);
		bao::decode::Decoder::new_outboard(Cursor::new(&input), Cursor::new(&outboard), &hash);
		println!();
		@@ -284,3 +285,3 @@ for &seek in &repeated_seeks {
		fn decode_slice(slice: &[u8], hash: &Hash, start: u64, len: u64) -> io::Result<Vec<u8>> {
		let mut reader = abao::decode::SliceDecoder::new(slice, hash, start, len);
		let mut reader = bao::decode::SliceDecoder::new(slice, hash, start, len);
		let mut output = Vec::new();
		@@ -296,4 +297,4 @@ reader.read_to_end(&mut output)?;
		let input = make_input(case.input_len);
		let (encoded, hash) = abao::encode::encode(&input);
		let (outboard, outboard_hash) = abao::encode::outboard(&input);
		let (encoded, hash) = bao::encode::encode(&input);
		let (outboard, outboard_hash) = bao::encode::outboard(&input);
		assert_eq!(hash, outboard_hash);
		@@ -309,3 +310,3 @@
		let mut combined_extractor =
		abao::encode::SliceExtractor::new(Cursor::new(&encoded), slice.start, slice.len);
		bao::encode::SliceExtractor::new(Cursor::new(&encoded), slice.start, slice.len);
		let mut combined_slice = Vec::new();
		@@ -320,3 +321,3 @@ combined_extractor.read_to_end(&mut combined_slice).unwrap();
		// Make sure slicing the outboard encoding also gives the right output.
		let mut outboard_extractor = abao::encode::SliceExtractor::new_outboard(
		let mut outboard_extractor = bao::encode::SliceExtractor::new_outboard(
		Cursor::new(&input),
		@@ -323,0 +324,0 @@ Cursor::new(&outboard),

Cargo.toml.orig

Sorry, the diff of this file is not supported yet

src/decode.rs

Sorry, the diff of this file is too big to display

tests/test_vectors.json

Sorry, the diff of this file is too big to display

abao - cargo Package Compare versions