diced
Advanced tools
+13
-1
@@ -9,5 +9,17 @@ # Changelog | ||
| ## [Unreleased] | ||
| [Unreleased]: https://github.com/althonos/diced/compare/v0.1.0...HEAD | ||
| [Unreleased]: https://github.com/althonos/diced/compare/v0.1.1...HEAD | ||
| ## [v0.1.1] - 2024-06-19 | ||
| [v0.1.1]: https://github.com/althonos/diced/compare/v0.1.0...v0.1.1 | ||
| ### Changed | ||
| - Use raw sequence bytes to avoid panics on Unicode characters slicing in `Scanner`. | ||
| ### Fixed | ||
| - Incorrect metadata in Python package and documentation. | ||
| - Missing test data in Python wheels. | ||
| - Indexing and underflow errors found by `afl` fuzzer. | ||
| ## [v0.1.0] - 2024-06-11 | ||
@@ -14,0 +26,0 @@ [Unreleased]: https://github.com/althonos/diced/compare/11ad0d3...v0.1.0 |
| [package] | ||
| name = "diced-py" | ||
| version = "0.1.0" | ||
| version = "0.1.1" | ||
| authors = ["Martin Larralde <martin.larralde@embl.de>"] | ||
@@ -21,3 +21,3 @@ edition = "2021" | ||
| path = "../diced" | ||
| version = "0.1.0" | ||
| version = "0.1.1" | ||
@@ -24,0 +24,0 @@ [dependencies] |
| Metadata-Version: 2.1 | ||
| Name: diced | ||
| Version: 0.1.0 | ||
| Version: 0.1.1 | ||
| Summary: Rust re-implementation of the MinCED algorithm to Detect Instances of CRISPRs in Environmental Data. | ||
@@ -8,3 +8,3 @@ Home-page: https://github.com/althonos/diced | ||
| Author-email: martin.larralde@embl.de | ||
| License: MIT OR GPL-3.0-or-later | ||
| License: GPL-3.0-or-later | ||
| Project-URL: Bug Tracker, https://github.com/althonos/diced/issues | ||
@@ -20,3 +20,3 @@ Project-URL: Changelog, https://github.com/althonos/diced/blob/master/CHANGELOG.md | ||
| Classifier: Intended Audience :: Science/Research | ||
| Classifier: License :: OSI Approved :: MIT License | ||
| Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) | ||
| Classifier: Operating System :: OS Independent | ||
@@ -44,3 +44,3 @@ Classifier: Programming Language :: Rust | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://codecov.io/gh/althonos/diced/) | ||
@@ -74,2 +74,4 @@ [](https://choosealicense.com/licenses/gpl-3.0/) | ||
| *This is the Python version, there is a [Rust crate](https://crates.io/crates/diced) available as well.* | ||
| ### 📋 Features | ||
@@ -76,0 +78,0 @@ |
@@ -1,2 +0,2 @@ | ||
| __version__ = "0.1.0" | ||
| __version__ = "0.1.1" | ||
@@ -3,0 +3,0 @@ from . import lib |
@@ -5,3 +5,3 @@ # 🔪🧅 Diced [](https://github.com/althonos/diced/stargazers) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://codecov.io/gh/althonos/diced/) | ||
@@ -35,2 +35,4 @@ [](https://choosealicense.com/licenses/gpl-3.0/) | ||
| *This is the Python version, there is a [Rust crate](https://crates.io/crates/diced) available as well.* | ||
| ### 📋 Features | ||
@@ -37,0 +39,0 @@ |
+1
-1
| [package] | ||
| name = "diced" | ||
| version = "0.1.0" | ||
| version = "0.1.1" | ||
| authors = ["Martin Larralde <martin.larralde@embl.de>"] | ||
@@ -5,0 +5,0 @@ edition = "2021" |
+3
-1
@@ -5,3 +5,3 @@ # 🔪🧅 Diced [](https://github.com/althonos/diced/stargazers) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://codecov.io/gh/althonos/diced/) | ||
@@ -31,2 +31,4 @@ [](https://choosealicense.com/licenses/gpl-3.0/) | ||
| *This is the Rust version, there is a [Python package](https://pypi.org/project/diced) available as well.* | ||
| ### 📋 Features | ||
@@ -33,0 +35,0 @@ |
+63
-15
@@ -67,3 +67,3 @@ #![doc = include_str!("../README.md")] | ||
| pub fn new(sequence: S) -> Self { | ||
| let s = sequence.as_ref(); | ||
| let s = sequence.as_ref().as_bytes(); | ||
| let mut mask = Vec::new(); | ||
@@ -79,3 +79,3 @@ | ||
| while j < s.len() && s.as_bytes()[i] == s.as_bytes()[j] { | ||
| while j < s.len() && s[i] == s[j] { | ||
| n += 1; | ||
@@ -243,4 +243,5 @@ j += 1; | ||
| fn _scan_right(&self, crispr: &mut Crispr<S>, pattern: &str, scan_range: usize) { | ||
| fn _scan_right(&self, crispr: &mut Crispr<S>, pattern: &[u8], scan_range: usize) { | ||
| let seq = crispr.sequence.as_ref(); | ||
| let bytes = seq.as_bytes(); | ||
@@ -275,6 +276,6 @@ let num_repeats = crispr.indices.len(); | ||
| let subseq = &seq[begin_search..end_search]; | ||
| let subseq = &bytes[begin_search..end_search]; | ||
| #[cfg(feature = "memchr")] | ||
| let pos = finder.find(subseq.as_bytes()); | ||
| let pos = finder.find(subseq); | ||
| #[cfg(not(feature = "memchr"))] | ||
@@ -314,3 +315,3 @@ let pos = subseq.find(pattern); | ||
| let max_right_extension_length = | ||
| shortest_repeat_spacing - self.parameters.min_spacer_length; | ||
| shortest_repeat_spacing.saturating_sub(self.parameters.min_spacer_length); | ||
@@ -339,8 +340,11 @@ while right_extension_length <= max_right_extension_length { | ||
| } | ||
| right_extension_length -= 1; | ||
| if right_extension_length > 0 { | ||
| right_extension_length -= 1; | ||
| } | ||
| char_counts.clear(); | ||
| let mut left_extension_length = 0; | ||
| let max_left_extension_length = | ||
| shortest_repeat_spacing - self.parameters.min_spacer_length - right_extension_length; | ||
| let max_left_extension_length = shortest_repeat_spacing | ||
| .saturating_sub(self.parameters.min_spacer_length) | ||
| .saturating_sub(right_extension_length); | ||
| while left_extension_length <= max_left_extension_length { | ||
@@ -367,3 +371,5 @@ if first_repeat_start_index < left_extension_length { | ||
| } | ||
| left_extension_length -= 1; | ||
| if left_extension_length > 0 { | ||
| left_extension_length -= 1; | ||
| } | ||
@@ -559,4 +565,4 @@ for index in crispr.indices.iter_mut() { | ||
| let repeat = crispr.repeat(k); | ||
| let first_char = repeat.chars().next().unwrap(); | ||
| char_counts.count(first_char); | ||
| let first_char = repeat.as_bytes().first().unwrap(); | ||
| char_counts.count(*first_char as char); | ||
| } | ||
@@ -580,2 +586,3 @@ if (char_counts.max() as f32) / (num_repeats as f32) < Self::THRESHOLD { | ||
| let seq = self.sequence.as_ref(); | ||
| let bytes = seq.as_bytes(); | ||
@@ -625,7 +632,7 @@ let skips = self | ||
| let pattern = &seq[pattern_start..pattern_end]; | ||
| let subseq = &seq[begin_search..end_search]; | ||
| let pattern = &bytes[pattern_start..pattern_end]; | ||
| let subseq = &bytes[begin_search..end_search]; | ||
| #[cfg(feature = "memchr")] | ||
| let pos = memchr::memmem::find(subseq.as_bytes(), pattern.as_bytes()); | ||
| let pos = memchr::memmem::find(subseq, pattern); | ||
| #[cfg(not(feature = "memchr"))] | ||
@@ -831,2 +838,43 @@ let pos = subseq.find(pattern); | ||
| } | ||
| #[test] | ||
| fn scan_unicode() { | ||
| const UNICODE: &'static str = concat!( | ||
| "AAAAAAAGAFCACATTGACGCGGGGGGGGCATACCAAACATAATTGACcCGGACACGCCAAGGCT", | ||
| "CACGTTAACAAAAGACACGACGCGGGACAATAGGATAAACATAATTGACTAAACGTGGGAACACG", | ||
| "CGGGCATACCAAACATAATTGACcCGGATTGACGCGGGACAATAGGATAAACATAATTGACCACC", | ||
| "CGGACACCAAAAAAAGAFCACATTGACGCGGGCATACCAgACATAαTTGACcCGGAAAAAAAAGA", | ||
| "FCACATTGACGAAAAAGA)GAFCACATSGACGGTCGTTTTCATGAACAAGTTGACcCGGACdCAA", | ||
| "CAAAGAFCACATTGACGCGGcCGGACdCAACAAAGGCATACCAAACATAAATTGGCGGGCATACC", | ||
| "AAACA" | ||
| ); | ||
| let it = ScannerBuilder::default().scan(UNICODE); | ||
| let crisprs = it.collect::<Vec<_>>(); | ||
| assert_eq!(crisprs.len(), 0); | ||
| const UNICODE2: &'static str = concat!( | ||
| "GAAJJJJGssGAGAGGTATAACCAbAACCGTTGTGTJJJJJJJJJJJJJJJJJJJGGTATAACCA", | ||
| "bAACCGTTGTGT@AGGAGAGGTATAACCAbAACCGTTGTGT@GGTJJJJJJGGTATAACCAbAAC", | ||
| "CGTATAAGAGGTATAACCAbAACCGTTGTGT@GGTATAGATCTAATGG?AATGAAGGCAATAAGG", | ||
| "T\x04\x00yN", | ||
| ); | ||
| let it2 = ScannerBuilder::default().scan(UNICODE2); | ||
| let crisprs2 = it2.collect::<Vec<_>>(); | ||
| assert_eq!(crisprs2.len(), 0); | ||
| const UNICODE3: &'static str = concat!( | ||
| "GAAJJJJJJJJJJJJJJJJJJJJJJJGGTATAACCAbAACCGTTGTGT@AGGAGAGGTATAACCAbAACC", | ||
| "GTTGTGT@GGTJJJJJJGGTATAACCAbAACCGTATAAGAGGTATAACCAAGAGGTATAACCAbAACCGT", | ||
| "TGTGGAAJJJJJJJJJJJJJJJJJCCGTTGTTGTGT@AGGAGAGGTATAACCAbAACCGTTGTGT@GGTJ", | ||
| "JJJJJGGTETAACCAbAACCGTATAAGAGGTATAACCAbAACCGAGAGGTAGGACAACTTACCTACATAA", | ||
| "CCAbAACCGTTGTGGAAJJJJJJJJJJJJJJJJJCCGTTGTTGTGT@AGGAGAGGTATAACCAbAACCGT", | ||
| "TGTGT@GAAJJJJJJJJJJJJJJJJJJJJJJJGGTATAA6CAbAACAGTTGTGT@AGGAGAGGTATAACC", | ||
| "AbAACCGTTGTGT@GGTJJJJJJGGTATAACCAGCGGTTGAAGGTGTGCAACCTCAGTCbAACCGTACGT", | ||
| ); | ||
| let it3 = ScannerBuilder::default().scan(UNICODE3); | ||
| let crisprs3 = it3.collect::<Vec<_>>(); | ||
| assert_eq!(crisprs3.len(), 1); | ||
| } | ||
| } |
+6
-4
| Metadata-Version: 2.1 | ||
| Name: diced | ||
| Version: 0.1.0 | ||
| Version: 0.1.1 | ||
| Summary: Rust re-implementation of the MinCED algorithm to Detect Instances of CRISPRs in Environmental Data. | ||
@@ -8,3 +8,3 @@ Home-page: https://github.com/althonos/diced | ||
| Author-email: martin.larralde@embl.de | ||
| License: MIT OR GPL-3.0-or-later | ||
| License: GPL-3.0-or-later | ||
| Project-URL: Bug Tracker, https://github.com/althonos/diced/issues | ||
@@ -20,3 +20,3 @@ Project-URL: Changelog, https://github.com/althonos/diced/blob/master/CHANGELOG.md | ||
| Classifier: Intended Audience :: Science/Research | ||
| Classifier: License :: OSI Approved :: MIT License | ||
| Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) | ||
| Classifier: Operating System :: OS Independent | ||
@@ -44,3 +44,3 @@ Classifier: Programming Language :: Rust | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://codecov.io/gh/althonos/diced/) | ||
@@ -74,2 +74,4 @@ [](https://choosealicense.com/licenses/gpl-3.0/) | ||
| *This is the Python version, there is a [Rust crate](https://crates.io/crates/diced) available as well.* | ||
| ### 📋 Features | ||
@@ -76,0 +78,0 @@ |
+3
-1
@@ -5,3 +5,3 @@ # 🔪🧅 Diced [](https://github.com/althonos/diced/stargazers) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://github.com/althonos/diced/actions) | ||
| [](https://codecov.io/gh/althonos/diced/) | ||
@@ -31,2 +31,4 @@ [](https://choosealicense.com/licenses/gpl-3.0/) | ||
| *This is the Rust version, there is a [Python package](https://pypi.org/project/diced) available as well.* | ||
| ### 📋 Features | ||
@@ -33,0 +35,0 @@ |
+4
-2
@@ -10,3 +10,3 @@ [metadata] | ||
| long_description_content_type = text/markdown | ||
| license = MIT OR GPL-3.0-or-later | ||
| license = GPL-3.0-or-later | ||
| platform = any | ||
@@ -18,3 +18,3 @@ keywords = bioinformatics, genomics, motif, pssm, matrix | ||
| Intended Audience :: Science/Research | ||
| License :: OSI Approved :: MIT License | ||
| License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) | ||
| Operating System :: OS Independent | ||
@@ -63,2 +63,4 @@ Programming Language :: Rust | ||
| requirements.txt | ||
| diced.tests.data = | ||
| *.fna | ||
@@ -65,0 +67,0 @@ [bdist_wheel] |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1711956
0.19%