| { | ||
| "git": { | ||
| "sha1": "55cd12ebb25c6261492e1e3dfa2e6453c54dde31" | ||
| "sha1": "6bd4893cc44c2ca2718de47a119a31cc40045fe5" | ||
| }, | ||
| "path_in_vcs": "utils/tinystr" | ||
| } |
@@ -34,3 +34,3 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| fn construct_from_bytes(c: &mut Criterion) { | ||
| fn construct_from_utf8(c: &mut Criterion) { | ||
| macro_rules! cfu { | ||
@@ -42,3 +42,3 @@ ($r:ty, $inputs:expr) => { | ||
| for u in &raw { | ||
| let _ = black_box(<$r>::from_bytes(*u).unwrap()); | ||
| let _ = black_box(<$r>::try_from_utf8(*u).unwrap()); | ||
| } | ||
@@ -50,3 +50,3 @@ }) | ||
| let mut group4 = c.benchmark_group("construct_from_bytes/4"); | ||
| let mut group4 = c.benchmark_group("construct_from_utf8/4"); | ||
| group4.bench_function("TinyAsciiStr<4>", cfu!(TinyAsciiStr<4>, STRINGS_4)); | ||
@@ -57,3 +57,3 @@ group4.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_4)); | ||
| let mut group8 = c.benchmark_group("construct_from_bytes/8"); | ||
| let mut group8 = c.benchmark_group("construct_from_utf8/8"); | ||
| group8.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_8)); | ||
@@ -63,3 +63,3 @@ group8.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_8)); | ||
| let mut group16 = c.benchmark_group("construct_from_bytes/16"); | ||
| let mut group16 = c.benchmark_group("construct_from_utf8/16"); | ||
| group16.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_16)); | ||
@@ -69,3 +69,3 @@ group16.finish(); | ||
| criterion_group!(benches, construct_from_str, construct_from_bytes,); | ||
| criterion_group!(benches, construct_from_str, construct_from_utf8,); | ||
| criterion_main!(benches); |
+45
-7
@@ -18,15 +18,21 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| g.bench_function("construct/TinyAsciiStr", |b| { | ||
| g.bench_function("construct/utf8/TinyAsciiStr", |b| { | ||
| b.iter(|| { | ||
| for s in STRINGS_4 { | ||
| let _: TinyAsciiStr<4> = black_box(s).parse().unwrap(); | ||
| let _: TinyAsciiStr<8> = black_box(s).parse().unwrap(); | ||
| let _: TinyAsciiStr<16> = black_box(s).parse().unwrap(); | ||
| let _: TinyAsciiStr<4> = | ||
| TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); | ||
| let _: TinyAsciiStr<8> = | ||
| TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); | ||
| let _: TinyAsciiStr<16> = | ||
| TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); | ||
| } | ||
| for s in STRINGS_8 { | ||
| let _: TinyAsciiStr<8> = black_box(s).parse().unwrap(); | ||
| let _: TinyAsciiStr<16> = black_box(s).parse().unwrap(); | ||
| let _: TinyAsciiStr<8> = | ||
| TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); | ||
| let _: TinyAsciiStr<16> = | ||
| TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); | ||
| } | ||
| for s in STRINGS_16 { | ||
| let _: TinyAsciiStr<16> = black_box(s).parse().unwrap(); | ||
| let _: TinyAsciiStr<16> = | ||
| TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); | ||
| } | ||
@@ -36,2 +42,34 @@ }); | ||
| let strings_4_utf16: Vec<Vec<u16>> = STRINGS_4 | ||
| .iter() | ||
| .map(|s| s.encode_utf16().collect()) | ||
| .collect(); | ||
| let strings_8_utf16: Vec<Vec<u16>> = STRINGS_8 | ||
| .iter() | ||
| .map(|s| s.encode_utf16().collect()) | ||
| .collect(); | ||
| let strings_16_utf16: Vec<Vec<u16>> = STRINGS_16 | ||
| .iter() | ||
| .map(|s| s.encode_utf16().collect()) | ||
| .collect(); | ||
| g.bench_function("construct/utf16/TinyAsciiStr", |b| { | ||
| b.iter(|| { | ||
| for s in strings_4_utf16.iter() { | ||
| let _: TinyAsciiStr<4> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); | ||
| let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); | ||
| let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); | ||
| } | ||
| for s in strings_8_utf16.iter() { | ||
| let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); | ||
| let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); | ||
| } | ||
| for s in strings_16_utf16.iter() { | ||
| let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); | ||
| } | ||
| }); | ||
| }); | ||
| let parsed_ascii_4: Vec<TinyAsciiStr<4>> = STRINGS_4 | ||
@@ -38,0 +76,0 @@ .iter() |
+21
-8
@@ -14,6 +14,7 @@ # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO | ||
| edition = "2021" | ||
| rust-version = "1.67" | ||
| rust-version = "1.71.1" | ||
| name = "tinystr" | ||
| version = "0.7.6" | ||
| version = "0.8.0" | ||
| authors = ["The ICU4X Project Developers"] | ||
| build = false | ||
| include = [ | ||
@@ -29,2 +30,6 @@ "data/**/*", | ||
| ] | ||
| autobins = false | ||
| autoexamples = false | ||
| autotests = false | ||
| autobenches = false | ||
| description = "A small ASCII-only bounded length string representation." | ||
@@ -53,2 +58,4 @@ readme = "README.md" | ||
| [lib] | ||
| name = "tinystr" | ||
| path = "src/lib.rs" | ||
| bench = false | ||
@@ -58,15 +65,19 @@ | ||
| name = "serde" | ||
| path = "tests/serde.rs" | ||
| required-features = ["serde"] | ||
| [[bench]] | ||
| name = "overview" | ||
| name = "construct" | ||
| path = "benches/construct.rs" | ||
| harness = false | ||
| required-features = ["bench"] | ||
| [[bench]] | ||
| name = "construct" | ||
| name = "overview" | ||
| path = "benches/overview.rs" | ||
| harness = false | ||
| required-features = ["bench"] | ||
| [[bench]] | ||
| name = "read" | ||
| path = "benches/read.rs" | ||
| harness = false | ||
@@ -77,2 +88,3 @@ required-features = ["bench"] | ||
| name = "serde" | ||
| path = "benches/serde.rs" | ||
| harness = false | ||
@@ -85,3 +97,3 @@ required-features = [ | ||
| [dependencies.databake] | ||
| version = "0.1.8" | ||
| version = "0.2.0" | ||
| optional = true | ||
@@ -101,3 +113,3 @@ default-features = false | ||
| [dependencies.zerovec] | ||
| version = "0.10.2" | ||
| version = "0.11.0" | ||
| optional = true | ||
@@ -116,2 +128,3 @@ default-features = false | ||
| version = "0.8" | ||
| features = ["small_rng"] | ||
@@ -128,3 +141,3 @@ [dev-dependencies.serde_json] | ||
| [target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion] | ||
| [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies.criterion] | ||
| version = "0.5.0" |
+193
-59
@@ -7,3 +7,3 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| use crate::int_ops::{Aligned4, Aligned8}; | ||
| use crate::TinyStrError; | ||
| use crate::ParseError; | ||
| use core::fmt; | ||
@@ -20,20 +20,37 @@ use core::ops::Deref; | ||
| impl<const N: usize> TinyAsciiStr<N> { | ||
| /// Creates a `TinyAsciiStr<N>` from the given byte slice. | ||
| /// `bytes` may contain at most `N` non-null ASCII bytes. | ||
| pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { | ||
| Self::from_bytes_inner(bytes, 0, bytes.len(), false) | ||
| #[inline] | ||
| pub const fn try_from_str(s: &str) -> Result<Self, ParseError> { | ||
| Self::try_from_utf8(s.as_bytes()) | ||
| } | ||
| /// Creates a `TinyAsciiStr<N>` from a byte slice, replacing invalid bytes. | ||
| /// Creates a `TinyAsciiStr<N>` from the given UTF-8 slice. | ||
| /// `code_units` may contain at most `N` non-null ASCII code points. | ||
| #[inline] | ||
| pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> { | ||
| Self::try_from_utf8_inner(code_units, false) | ||
| } | ||
| /// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice. | ||
| /// `code_units` may contain at most `N` non-null ASCII code points. | ||
| #[inline] | ||
| pub const fn try_from_utf16(code_units: &[u16]) -> Result<Self, ParseError> { | ||
| Self::try_from_utf16_inner(code_units, 0, code_units.len(), false) | ||
| } | ||
| /// Creates a `TinyAsciiStr<N>` from a UTF-8 slice, replacing invalid code units. | ||
| /// | ||
| /// Null and non-ASCII bytes (i.e. those outside the range `0x01..=0x7F`) | ||
| /// will be replaced with the '?' character. | ||
| /// Invalid code units, as well as null or non-ASCII code points | ||
| /// (i.e. those outside the range U+0001..=U+007F`) | ||
| /// will be replaced with the replacement byte. | ||
| /// | ||
| /// The input slice will be truncated if its length exceeds `N`. | ||
| pub const fn from_bytes_lossy(bytes: &[u8]) -> Self { | ||
| const QUESTION: u8 = b'?'; | ||
| pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self { | ||
| let mut out = [0; N]; | ||
| let mut i = 0; | ||
| // Ord is not available in const, so no `.min(N)` | ||
| let len = if bytes.len() > N { N } else { bytes.len() }; | ||
| let len = if code_units.len() > N { | ||
| N | ||
| } else { | ||
| code_units.len() | ||
| }; | ||
@@ -43,7 +60,7 @@ // Indexing is protected by the len check above | ||
| while i < len { | ||
| let b = bytes[i]; | ||
| let b = code_units[i]; | ||
| if b > 0 && b < 0x80 { | ||
| out[i] = b; | ||
| } else { | ||
| out[i] = QUESTION; | ||
| out[i] = replacement; | ||
| } | ||
@@ -59,2 +76,37 @@ i += 1; | ||
| /// Creates a `TinyAsciiStr<N>` from a UTF-16 slice, replacing invalid code units. | ||
| /// | ||
| /// Invalid code units, as well as null or non-ASCII code points | ||
| /// (i.e. those outside the range U+0001..=U+007F`) | ||
| /// will be replaced with the replacement byte. | ||
| /// | ||
| /// The input slice will be truncated if its length exceeds `N`. | ||
| pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self { | ||
| let mut out = [0; N]; | ||
| let mut i = 0; | ||
| // Ord is not available in const, so no `.min(N)` | ||
| let len = if code_units.len() > N { | ||
| N | ||
| } else { | ||
| code_units.len() | ||
| }; | ||
| // Indexing is protected by the len check above | ||
| #[allow(clippy::indexing_slicing)] | ||
| while i < len { | ||
| let b = code_units[i]; | ||
| if b > 0 && b < 0x80 { | ||
| out[i] = b as u8; | ||
| } else { | ||
| out[i] = replacement; | ||
| } | ||
| i += 1; | ||
| } | ||
| Self { | ||
| // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` | ||
| bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, | ||
| } | ||
| } | ||
| /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`. | ||
@@ -80,26 +132,58 @@ /// | ||
| /// ``` | ||
| pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> { | ||
| Self::from_bytes_inner(&raw, 0, N, true) | ||
| pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, ParseError> { | ||
| Self::try_from_utf8_inner(&raw, true) | ||
| } | ||
| /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes), | ||
| /// but callable in a `const` context (which range indexing is not). | ||
| pub const fn from_bytes_manual_slice( | ||
| bytes: &[u8], | ||
| start: usize, | ||
| end: usize, | ||
| ) -> Result<Self, TinyStrError> { | ||
| Self::from_bytes_inner(bytes, start, end, false) | ||
| pub(crate) const fn try_from_utf8_inner( | ||
| code_units: &[u8], | ||
| allow_trailing_null: bool, | ||
| ) -> Result<Self, ParseError> { | ||
| if code_units.len() > N { | ||
| return Err(ParseError::TooLong { | ||
| max: N, | ||
| len: code_units.len(), | ||
| }); | ||
| } | ||
| let mut out = [0; N]; | ||
| let mut i = 0; | ||
| let mut found_null = false; | ||
| // Indexing is protected by TinyStrError::TooLarge | ||
| #[allow(clippy::indexing_slicing)] | ||
| while i < code_units.len() { | ||
| let b = code_units[i]; | ||
| if b == 0 { | ||
| found_null = true; | ||
| } else if b >= 0x80 { | ||
| return Err(ParseError::NonAscii); | ||
| } else if found_null { | ||
| // Error if there are contentful bytes after null | ||
| return Err(ParseError::ContainsNull); | ||
| } | ||
| out[i] = b; | ||
| i += 1; | ||
| } | ||
| if !allow_trailing_null && found_null { | ||
| // We found some trailing nulls, error | ||
| return Err(ParseError::ContainsNull); | ||
| } | ||
| Ok(Self { | ||
| // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` | ||
| bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, | ||
| }) | ||
| } | ||
| #[inline] | ||
| pub(crate) const fn from_bytes_inner( | ||
| bytes: &[u8], | ||
| pub(crate) const fn try_from_utf16_inner( | ||
| code_units: &[u16], | ||
| start: usize, | ||
| end: usize, | ||
| allow_trailing_null: bool, | ||
| ) -> Result<Self, TinyStrError> { | ||
| ) -> Result<Self, ParseError> { | ||
| let len = end - start; | ||
| if len > N { | ||
| return Err(TinyStrError::TooLarge { max: N, len }); | ||
| return Err(ParseError::TooLong { max: N, len }); | ||
| } | ||
@@ -113,3 +197,3 @@ | ||
| while i < len { | ||
| let b = bytes[start + i]; | ||
| let b = code_units[start + i]; | ||
@@ -119,8 +203,8 @@ if b == 0 { | ||
| } else if b >= 0x80 { | ||
| return Err(TinyStrError::NonAscii); | ||
| return Err(ParseError::NonAscii); | ||
| } else if found_null { | ||
| // Error if there are contentful bytes after null | ||
| return Err(TinyStrError::ContainsNull); | ||
| return Err(ParseError::ContainsNull); | ||
| } | ||
| out[i] = b; | ||
| out[i] = b as u8; | ||
@@ -132,3 +216,3 @@ i += 1; | ||
| // We found some trailing nulls, error | ||
| return Err(TinyStrError::ContainsNull); | ||
| return Err(ParseError::ContainsNull); | ||
| } | ||
@@ -142,12 +226,6 @@ | ||
| // TODO: This function shadows the FromStr trait. Rename? | ||
| #[inline] | ||
| pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { | ||
| Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false) | ||
| } | ||
| #[inline] | ||
| pub const fn as_str(&self) -> &str { | ||
| // as_bytes is valid utf8 | ||
| unsafe { str::from_utf8_unchecked(self.as_bytes()) } | ||
| // as_utf8 is valid utf8 | ||
| unsafe { str::from_utf8_unchecked(self.as_utf8()) } | ||
| } | ||
@@ -180,3 +258,3 @@ | ||
| #[must_use] | ||
| pub const fn as_bytes(&self) -> &[u8] { | ||
| pub const fn as_utf8(&self) -> &[u8] { | ||
| // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`, | ||
@@ -213,5 +291,48 @@ // and changing the length of that slice to self.len() < N is safe. | ||
| // ASCII characters, so this also holds for `bytes`. | ||
| unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) } | ||
| unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) } | ||
| } | ||
| #[inline] | ||
| #[must_use] | ||
| /// Returns a `TinyAsciiStr<Q>` with the concatenation of this string, | ||
| /// `TinyAsciiStr<N>`, and another string, `TinyAsciiStr<M>`. | ||
| /// | ||
| /// If `Q < N + M`, the string gets truncated. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use tinystr::tinystr; | ||
| /// use tinystr::TinyAsciiStr; | ||
| /// | ||
| /// let abc = tinystr!(6, "abc"); | ||
| /// let defg = tinystr!(6, "defg"); | ||
| /// | ||
| /// // The concatenation is successful if Q is large enough... | ||
| /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg")); | ||
| /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg")); | ||
| /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg")); | ||
| /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg")); | ||
| /// | ||
| /// /// ...but it truncates of Q is too small. | ||
| /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef")); | ||
| /// assert_eq!(abc.concat(defg), tinystr!(2, "ab")); | ||
| /// ``` | ||
| pub const fn concat<const M: usize, const Q: usize>( | ||
| self, | ||
| other: TinyAsciiStr<M>, | ||
| ) -> TinyAsciiStr<Q> { | ||
| let mut result = self.resize::<Q>(); | ||
| let mut i = self.len(); | ||
| let mut j = 0; | ||
| // Indexing is protected by the loop guard | ||
| #[allow(clippy::indexing_slicing)] | ||
| while i < Q && j < M { | ||
| result.bytes[i] = other.bytes[j]; | ||
| i += 1; | ||
| j += 1; | ||
| } | ||
| result | ||
| } | ||
| /// # Safety | ||
@@ -221,5 +342,5 @@ /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes | ||
| #[must_use] | ||
| pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { | ||
| pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self { | ||
| Self { | ||
| bytes: AsciiByte::to_ascii_byte_array(&bytes), | ||
| bytes: AsciiByte::to_ascii_byte_array(&code_units), | ||
| } | ||
@@ -669,6 +790,6 @@ } | ||
| impl<const N: usize> FromStr for TinyAsciiStr<N> { | ||
| type Err = TinyStrError; | ||
| type Err = ParseError; | ||
| #[inline] | ||
| fn from_str(s: &str) -> Result<Self, Self::Err> { | ||
| Self::from_str(s) | ||
| Self::try_from_str(s) | ||
| } | ||
@@ -773,3 +894,3 @@ } | ||
| Ok(t) => t, | ||
| Err(TinyStrError::TooLarge { .. }) => continue, | ||
| Err(ParseError::TooLong { .. }) => continue, | ||
| Err(e) => panic!("{}", e), | ||
@@ -780,2 +901,12 @@ }; | ||
| assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}"); | ||
| let s_utf16: Vec<u16> = s.encode_utf16().collect(); | ||
| let t = match TinyAsciiStr::<N>::try_from_utf16(&s_utf16) { | ||
| Ok(t) => t, | ||
| Err(ParseError::TooLong { .. }) => continue, | ||
| Err(e) => panic!("{}", e), | ||
| }; | ||
| let expected = reference_f(&s); | ||
| let actual = tinystr_f(t); | ||
| assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}"); | ||
| } | ||
@@ -837,3 +968,3 @@ } | ||
| |s| { | ||
| s == TinyAsciiStr::<16>::from_str(s) | ||
| s == TinyAsciiStr::<16>::try_from_str(s) | ||
| .unwrap() | ||
@@ -859,3 +990,3 @@ .to_ascii_lowercase() | ||
| |s| { | ||
| s == TinyAsciiStr::<16>::from_str(s) | ||
| s == TinyAsciiStr::<16>::try_from_str(s) | ||
| .unwrap() | ||
@@ -881,3 +1012,3 @@ .to_ascii_titlecase() | ||
| |s| { | ||
| s == TinyAsciiStr::<16>::from_str(s) | ||
| s == TinyAsciiStr::<16>::try_from_str(s) | ||
| .unwrap() | ||
@@ -906,3 +1037,3 @@ .to_ascii_uppercase() | ||
| // Check lowercase | ||
| s == TinyAsciiStr::<16>::from_str(s) | ||
| s == TinyAsciiStr::<16>::try_from_str(s) | ||
| .unwrap() | ||
@@ -931,3 +1062,3 @@ .to_ascii_lowercase() | ||
| // Check titlecase | ||
| s == TinyAsciiStr::<16>::from_str(s) | ||
| s == TinyAsciiStr::<16>::try_from_str(s) | ||
| .unwrap() | ||
@@ -956,3 +1087,3 @@ .to_ascii_titlecase() | ||
| // Check uppercase | ||
| s == TinyAsciiStr::<16>::from_str(s) | ||
| s == TinyAsciiStr::<16>::try_from_str(s) | ||
| .unwrap() | ||
@@ -1039,14 +1170,17 @@ .to_ascii_uppercase() | ||
| fn lossy_constructor() { | ||
| assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"").as_str(), ""); | ||
| assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), ""); | ||
| assert_eq!( | ||
| TinyAsciiStr::<4>::from_bytes_lossy(b"oh\0o").as_str(), | ||
| TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(), | ||
| "oh?o" | ||
| ); | ||
| assert_eq!(TinyAsciiStr::<4>::from_bytes_lossy(b"\0").as_str(), "?"); | ||
| assert_eq!( | ||
| TinyAsciiStr::<4>::from_bytes_lossy(b"toolong").as_str(), | ||
| TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(), | ||
| "?" | ||
| ); | ||
| assert_eq!( | ||
| TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(), | ||
| "tool" | ||
| ); | ||
| assert_eq!( | ||
| TinyAsciiStr::<4>::from_bytes_lossy(&[b'a', 0x80, 0xFF, b'1']).as_str(), | ||
| TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(), | ||
| "a??1" | ||
@@ -1053,0 +1187,0 @@ ); |
+31
-4
@@ -19,2 +19,8 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| impl<const N: usize> BakeSize for TinyAsciiStr<N> { | ||
| fn borrows_size(&self) -> usize { | ||
| 0 | ||
| } | ||
| } | ||
| impl<const N: usize> databake::Bake for UnvalidatedTinyAsciiStr<N> { | ||
@@ -33,3 +39,3 @@ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { | ||
| databake::quote! { | ||
| tinystr::UnvalidatedTinyAsciiStr::from_bytes_unchecked(*#bytes) | ||
| tinystr::UnvalidatedTinyAsciiStr::from_utf8_unchecked(#bytes) | ||
| } | ||
@@ -41,5 +47,16 @@ } | ||
| impl<const N: usize> databake::BakeSize for UnvalidatedTinyAsciiStr<N> { | ||
| fn borrows_size(&self) -> usize { | ||
| 0 | ||
| } | ||
| } | ||
| #[test] | ||
| fn test() { | ||
| test_bake!(TinyAsciiStr<10>, const: crate::tinystr!(10usize, "foo"), tinystr); | ||
| test_bake!( | ||
| TinyAsciiStr<10>, | ||
| const, | ||
| crate::tinystr!(10usize, "foo"), | ||
| tinystr | ||
| ); | ||
| } | ||
@@ -49,4 +66,14 @@ | ||
| fn test_unvalidated() { | ||
| test_bake!(UnvalidatedTinyAsciiStr<10>, const: crate::tinystr!(10usize, "foo").to_unvalidated(), tinystr); | ||
| test_bake!(UnvalidatedTinyAsciiStr<3>, const: crate::UnvalidatedTinyAsciiStr::from_bytes_unchecked(*b"AB\xCD"), tinystr); | ||
| test_bake!( | ||
| UnvalidatedTinyAsciiStr<10>, | ||
| const, | ||
| crate::tinystr!(10usize, "foo").to_unvalidated(), | ||
| tinystr | ||
| ); | ||
| test_bake!( | ||
| UnvalidatedTinyAsciiStr<3>, | ||
| const, | ||
| crate::UnvalidatedTinyAsciiStr::from_utf8_unchecked(*b"AB\xCD"), | ||
| tinystr | ||
| ); | ||
| } |
+4
-4
@@ -8,13 +8,13 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| #[cfg(feature = "std")] | ||
| impl std::error::Error for TinyStrError {} | ||
| impl std::error::Error for ParseError {} | ||
| #[derive(Display, Debug, PartialEq, Eq)] | ||
| #[non_exhaustive] | ||
| pub enum TinyStrError { | ||
| pub enum ParseError { | ||
| #[displaydoc("found string of larger length {len} when constructing string of length {max}")] | ||
| TooLarge { max: usize, len: usize }, | ||
| TooLong { max: usize, len: usize }, | ||
| #[displaydoc("tinystr types do not support strings with null bytes")] | ||
| ContainsNull, | ||
| #[displaydoc("attempted to construct TinyStrAuto from a non-ascii string")] | ||
| #[displaydoc("attempted to construct TinyAsciiStr from a non-ASCII string")] | ||
| NonAscii, | ||
| } |
+4
-4
@@ -16,3 +16,3 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| #[inline] | ||
| pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self { | ||
| pub const fn from_utf8<const N: usize>(src: &[u8; N]) -> Self { | ||
| let mut bytes = [0; 4]; | ||
@@ -31,3 +31,3 @@ let mut i = 0; | ||
| pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self { | ||
| Self::from_bytes::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) }) | ||
| Self::from_utf8::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) }) | ||
| } | ||
@@ -175,3 +175,3 @@ | ||
| #[inline] | ||
| pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self { | ||
| pub const fn from_utf8<const N: usize>(src: &[u8; N]) -> Self { | ||
| let mut bytes = [0; 8]; | ||
@@ -190,3 +190,3 @@ let mut i = 0; | ||
| pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self { | ||
| Self::from_bytes::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) }) | ||
| Self::from_utf8::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) }) | ||
| } | ||
@@ -193,0 +193,0 @@ |
+1
-6
@@ -90,3 +90,3 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| pub use ascii::TinyAsciiStr; | ||
| pub use error::TinyStrError; | ||
| pub use error::ParseError; | ||
| pub use unvalidated::UnvalidatedTinyAsciiStr; | ||
@@ -115,6 +115,1 @@ | ||
| } | ||
| // /// Allows unit tests to use the macro | ||
| // #[cfg(test)] | ||
| // mod tinystr { | ||
| // pub use super::{TinyAsciiStr, TinyStrError}; | ||
| // } |
+1
-1
@@ -10,3 +10,3 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = { | ||
| match $crate::TinyAsciiStr::from_bytes($s.as_bytes()) { | ||
| match $crate::TinyAsciiStr::try_from_utf8($s.as_bytes()) { | ||
| Ok(s) => s, | ||
@@ -13,0 +13,0 @@ // We are okay with panicking here because this is in a const context |
+2
-2
@@ -75,3 +75,3 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| Ok(unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) }) | ||
| Ok(unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }) | ||
| } | ||
@@ -87,3 +87,3 @@ } | ||
| let x: Cow<'de, str> = Deserialize::deserialize(deserializer)?; | ||
| TinyAsciiStr::from_str(&x).map_err(|e| Error::custom(e.to_string())) | ||
| TinyAsciiStr::try_from_str(&x).map_err(|e| Error::custom(e.to_string())) | ||
| } else { | ||
@@ -90,0 +90,0 @@ deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::<N>::new()) |
+16
-11
@@ -15,4 +15,4 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) | ||
| // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. | ||
| // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. | ||
| // 3. The impl of validate_bytes() returns an error if any byte is not valid. | ||
| // 4. The impl of validate_bytes() returns an error if there are extra bytes. | ||
| // 5. The other ULE methods use the default impl. | ||
@@ -22,10 +22,10 @@ // 6. TinyAsciiStr byte equality is semantic equality | ||
| #[inline] | ||
| fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { | ||
| fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { | ||
| if bytes.len() % N != 0 { | ||
| return Err(ZeroVecError::length::<Self>(bytes.len())); | ||
| return Err(UleError::length::<Self>(bytes.len())); | ||
| } | ||
| // Validate the bytes | ||
| for chunk in bytes.chunks_exact(N) { | ||
| let _ = TinyAsciiStr::<N>::from_bytes_inner(chunk, 0, N, true) | ||
| .map_err(|_| ZeroVecError::parse::<Self>())?; | ||
| let _ = TinyAsciiStr::<N>::try_from_utf8_inner(chunk, true) | ||
| .map_err(|_| UleError::parse::<Self>())?; | ||
| } | ||
@@ -36,2 +36,7 @@ Ok(()) | ||
| impl<const N: usize> NicheBytes<N> for TinyAsciiStr<N> { | ||
| // AsciiByte is 0..128 | ||
| const NICHE_BIT_PATTERN: [u8; N] = [255; N]; | ||
| } | ||
| impl<const N: usize> AsULE for TinyAsciiStr<N> { | ||
@@ -63,4 +68,4 @@ type ULE = Self; | ||
| // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) | ||
| // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. | ||
| // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. | ||
| // 3. The impl of validate_bytes() returns an error if any byte is not valid. | ||
| // 4. The impl of validate_bytes() returns an error if there are extra bytes. | ||
| // 5. The other ULE methods use the default impl. | ||
@@ -70,5 +75,5 @@ // 6. UnvalidatedTinyAsciiStr byte equality is semantic equality | ||
| #[inline] | ||
| fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { | ||
| fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { | ||
| if bytes.len() % N != 0 { | ||
| return Err(ZeroVecError::length::<Self>(bytes.len())); | ||
| return Err(UleError::length::<Self>(bytes.len())); | ||
| } | ||
@@ -115,3 +120,3 @@ Ok(()) | ||
| let vec: ZeroVec<TinyAsciiStr<7>> = ZeroVec::parse_byte_slice(bytes).unwrap(); | ||
| let vec: ZeroVec<TinyAsciiStr<7>> = ZeroVec::parse_bytes(bytes).unwrap(); | ||
@@ -118,0 +123,0 @@ assert_eq!(&*vec.get(0).unwrap(), "foobar"); |
@@ -5,4 +5,4 @@ // This file is part of ICU4X. For terms of use, please see the file | ||
| use crate::ParseError; | ||
| use crate::TinyAsciiStr; | ||
| use crate::TinyStrError; | ||
| use core::fmt; | ||
@@ -32,9 +32,10 @@ | ||
| #[inline] | ||
| // Converts into a [`TinyAsciiStr`]. Fails if the bytes are not valid ASCII. | ||
| pub fn try_into_tinystr(&self) -> Result<TinyAsciiStr<N>, TinyStrError> { | ||
| /// Converts into a [`TinyAsciiStr`]. Fails if the bytes are not valid ASCII. | ||
| pub fn try_into_tinystr(self) -> Result<TinyAsciiStr<N>, ParseError> { | ||
| TinyAsciiStr::try_from_raw(self.0) | ||
| } | ||
| #[doc(hidden)] | ||
| pub const fn from_bytes_unchecked(bytes: [u8; N]) -> Self { | ||
| #[inline] | ||
| /// Unsafely converts into a [`TinyAsciiStr`]. | ||
| pub const fn from_utf8_unchecked(bytes: [u8; N]) -> Self { | ||
| Self(bytes) | ||
@@ -41,0 +42,0 @@ } |
Sorry, the diff of this file is not supported yet