quickner
Advanced tools
+1
-1
| /target | ||
| /quickner-cli/target | ||
| /quickner-core/target | ||
@@ -4,0 +4,0 @@ # Byte-compiled / optimized / DLL files |
+2
-2
@@ -548,3 +548,3 @@ # This file is automatically @generated by Cargo. | ||
| name = "quickner" | ||
| version = "0.0.1-alpha.12" | ||
| version = "0.0.1-alpha.13" | ||
| dependencies = [ | ||
@@ -558,3 +558,3 @@ "pyo3", | ||
| name = "quickner-core" | ||
| version = "0.0.1-alpha.12" | ||
| version = "0.0.1-alpha.13" | ||
| dependencies = [ | ||
@@ -561,0 +561,0 @@ "clap", |
+1
-1
| [package] | ||
| name = "quickner" | ||
| version = "0.0.1-alpha.12" | ||
| version = "0.0.1-alpha.13" | ||
| edition = "2021" | ||
@@ -5,0 +5,0 @@ authors = ["Omar MHAIMDAT"] |
| [package] | ||
| name = "quickner-core" | ||
| version = "0.0.1-alpha.12" | ||
| version = "0.0.1-alpha.13" | ||
| edition = "2021" | ||
@@ -5,0 +5,0 @@ authors = ["Omar MHAIMDAT"] |
@@ -22,6 +22,2 @@ use serde::{Deserialize, Serialize}; | ||
| } | ||
| fn ne(&self, other: &Self) -> bool { | ||
| !self.eq(other) | ||
| } | ||
| } | ||
@@ -28,0 +24,0 @@ |
@@ -0,1 +1,3 @@ | ||
| use std::hash::Hash; | ||
| use serde::{Deserialize, Serialize}; | ||
@@ -7,3 +9,3 @@ | ||
| /// annotate the text. | ||
| #[derive(Eq, Hash, Serialize, Deserialize, Clone, Debug)] | ||
| #[derive(Eq, Serialize, Deserialize, Clone, Debug)] | ||
| pub struct Entity { | ||
@@ -19,1 +21,8 @@ pub name: String, | ||
| } | ||
| impl Hash for Entity { | ||
| fn hash<H: std::hash::Hasher>(&self, state: &mut H) { | ||
| self.name.hash(state); | ||
| self.label.hash(state); | ||
| } | ||
| } |
@@ -27,3 +27,3 @@ use crate::{ | ||
| pub config: Config, | ||
| pub config_file: String, | ||
| pub config_file: Option<String>, | ||
| pub documents: Vec<Document>, | ||
@@ -40,3 +40,3 @@ pub entities: Vec<Entity>, | ||
| config: Config::default(), | ||
| config_file: "./config.toml".to_string(), | ||
| config_file: Some("./config.toml".to_string()), | ||
| documents: Vec::new(), | ||
@@ -74,3 +74,3 @@ entities: Vec::new(), | ||
| // let mut annotations = Vec::new(); | ||
| let annotations = entities.iter().map(|entity| { | ||
| let annotations = entities.iter().filter_map(|entity| { | ||
| let target_len = entity.name.len(); | ||
@@ -95,18 +95,12 @@ for (start, _) in text.match_indices(entity.name.as_str()) { | ||
| { | ||
| return (start, start + target_len, entity.label.to_string()); | ||
| return Some((start, start + target_len, entity.label.to_string())); | ||
| } | ||
| } | ||
| (0, 0, String::new()) | ||
| None | ||
| }); | ||
| let annotations: Vec<(usize, usize, String)> = annotations | ||
| .filter(|(_, _, label)| !label.is_empty()) | ||
| .collect(); | ||
| // Unique annotations | ||
| let mut annotations = annotations | ||
| .into_iter() | ||
| .collect::<HashSet<(usize, usize, String)>>() | ||
| .into_iter() | ||
| .collect::<Vec<(usize, usize, String)>>(); | ||
| let mut annotations = annotations.collect::<Vec<(usize, usize, String)>>(); | ||
| annotations.sort_by(|a, b| a.0.cmp(&b.0)); | ||
| annotations.dedup(); | ||
| // Sort annotations by start index | ||
| annotations.sort_by(|a, b| a.0.cmp(&b.0)); | ||
| if !annotations.is_empty() { | ||
@@ -196,6 +190,7 @@ Some(annotations) | ||
| let config = Config::from_file(config_file.as_str()); | ||
| let mut quick = Quickner::default(); | ||
| quick.config = config; | ||
| quick.config_file = config_file; | ||
| quick | ||
| Quickner { | ||
| config, | ||
| config_file: Some(config_file), | ||
| ..Default::default() | ||
| } | ||
| } | ||
@@ -283,3 +278,3 @@ | ||
| info!("----------------------------------------"); | ||
| if self.entities.len() == 0 { | ||
| if self.entities.is_empty() { | ||
| let entities: HashSet<Entity> = self.entities( | ||
@@ -292,3 +287,3 @@ config.entities.input.path.as_str(), | ||
| } | ||
| if self.documents.len() == 0 { | ||
| if self.documents.is_empty() { | ||
| let texts: HashSet<Text> = self.texts( | ||
@@ -489,3 +484,3 @@ config.texts.input.path.as_str(), | ||
| config: Config::default(), | ||
| config_file: String::from(""), | ||
| config_file: None, | ||
| documents, | ||
@@ -547,3 +542,3 @@ entities, | ||
| config: Config::default(), | ||
| config_file: String::from(""), | ||
| config_file: None, | ||
| documents, | ||
@@ -585,3 +580,3 @@ entities, | ||
| impl Quickner { | ||
| fn build_label_index(&mut self) { | ||
| pub fn build_label_index(&mut self) { | ||
| let mut index: HashMap<String, Vec<String>> = HashMap::new(); | ||
@@ -597,3 +592,3 @@ for document in &self.documents { | ||
| fn build_entity_index(&mut self) { | ||
| pub fn build_entity_index(&mut self) { | ||
| let mut index: HashMap<String, Vec<String>> = HashMap::new(); | ||
@@ -660,5 +655,5 @@ for document in &self.documents { | ||
| fn document_hash(documents: &Vec<Document>) -> HashMap<String, Document> { | ||
| pub fn document_hash(documents: &[Document]) -> HashMap<String, Document> { | ||
| documents | ||
| .into_iter() | ||
| .iter() | ||
| .map(|document| (document.id.clone(), document.clone())) | ||
@@ -665,0 +660,0 @@ .collect::<HashMap<String, Document>>() |
+2
-1
@@ -0,1 +1,2 @@ | ||
| all: build test | ||
| .PHONY: build | ||
@@ -6,2 +7,2 @@ build: | ||
| test: | ||
| python test.py | ||
| python tests/test.py |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: quickner | ||
| Version: 0.0.1a12 | ||
| Version: 0.0.1a13 | ||
| Classifier: Programming Language :: Rust | ||
@@ -5,0 +5,0 @@ Classifier: Programming Language :: Python :: Implementation :: CPython |
@@ -6,3 +6,3 @@ use crate::{ | ||
| use pyo3::prelude::*; | ||
| use quickner::{hash_string, Document, Entity}; | ||
| use quickner::{hash_string, Document}; | ||
| use serde::{Deserialize, Serialize}; | ||
@@ -78,6 +78,3 @@ | ||
| let mut annotation = Document::from_string(self.text.clone()); | ||
| let entities = entities | ||
| .into_iter() | ||
| .map(|entity| Entity::from(entity)) | ||
| .collect(); | ||
| let entities = entities.into_iter().collect(); | ||
| annotation.annotate(entities, case_sensitive); | ||
@@ -84,0 +81,0 @@ self.label.extend( |
+45
-46
@@ -37,6 +37,8 @@ use std::collections::HashMap; | ||
| pub type SpacyFormat = Vec<(String, HashMap<String, Vec<(usize, usize, String)>>)>; | ||
| #[pyclass(name = "SpacyGenerator")] | ||
| pub struct PySpacyGenerator { | ||
| #[pyo3(get)] | ||
| pub entities: Vec<Vec<(String, HashMap<String, Vec<(usize, usize, String)>>)>>, | ||
| pub entities: Vec<SpacyFormat>, | ||
| } | ||
@@ -48,3 +50,3 @@ | ||
| #[pyo3(signature = (entities))] | ||
| fn new(entities: Vec<Vec<(String, HashMap<String, Vec<(usize, usize, String)>>)>>) -> Self { | ||
| fn new(entities: Vec<SpacyFormat>) -> Self { | ||
| PySpacyGenerator { entities } | ||
@@ -57,5 +59,3 @@ } | ||
| fn __next__( | ||
| mut slf: PyRefMut<'_, Self>, | ||
| ) -> Option<Vec<(String, HashMap<String, Vec<(usize, usize, String)>>)>> { | ||
| fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<SpacyFormat> { | ||
| if slf.entities.is_empty() { | ||
@@ -94,3 +94,3 @@ PyGeneratorExit::new_err("No more entities"); | ||
| Some(documents) => { | ||
| quickner.documents = documents.into_iter().map(|x| Document::from(x)).collect(); | ||
| quickner.documents = documents.into_iter().collect(); | ||
| } | ||
@@ -101,3 +101,3 @@ None => quickner.documents = Vec::new(), | ||
| Some(entities) => { | ||
| quickner.entities = entities.into_iter().map(|x| Entity::from(x)).collect(); | ||
| quickner.entities = entities.into_iter().collect(); | ||
| } | ||
@@ -116,3 +116,7 @@ None => quickner.entities = Vec::new(), | ||
| pub fn documents(&mut self, documents: Vec<PyDocument>) { | ||
| self.documents = documents.into_iter().map(|x| x.into()).collect(); | ||
| self.documents = documents.clone(); | ||
| self.quickner.documents = documents.into_iter().collect(); | ||
| self.quickner.documents_hash = Quickner::document_hash(&self.quickner.documents); | ||
| self.quickner.build_label_index(); | ||
| self.quickner.build_entity_index(); | ||
| } | ||
@@ -122,3 +126,4 @@ | ||
| pub fn entities(&mut self, entities: Vec<PyEntity>) { | ||
| self.entities = entities.into_iter().map(|x| x.into()).collect(); | ||
| self.entities = entities.clone(); | ||
| self.quickner.entities = entities.into_iter().collect(); | ||
| } | ||
@@ -130,5 +135,4 @@ | ||
| } | ||
| match self.documents { | ||
| ref mut documents => documents.push(document.clone()), | ||
| } | ||
| let documents = &mut self.documents; | ||
| documents.push(document.clone()); | ||
| let document = Document::from(document); | ||
@@ -149,5 +153,4 @@ self.quickner.add_document(document); | ||
| } | ||
| match self.entities { | ||
| ref mut entities => entities.push(entity.clone()), | ||
| } | ||
| let entities = &mut self.entities; | ||
| entities.push(entity.clone()); | ||
| let entity = Entity { | ||
@@ -202,3 +205,3 @@ name: entity.name, | ||
| .into_iter() | ||
| .map(|document| PyDocument::from(document)) | ||
| .map(PyDocument::from) | ||
| .collect::<Vec<PyDocument>>(); | ||
@@ -210,3 +213,3 @@ self.entities = self | ||
| .into_iter() | ||
| .map(|entity| PyEntity::from(entity)) | ||
| .map(PyEntity::from) | ||
| .collect::<Vec<PyEntity>>(); | ||
@@ -308,5 +311,4 @@ Ok(()) | ||
| pub fn find_documents_by_label(&self, label: &str) -> Vec<PyDocument> { | ||
| let documents_index = match &self.quickner { | ||
| quickner => quickner.documents_label_index.to_owned(), | ||
| }; | ||
| let quickner = &self.quickner; | ||
| let documents_index = quickner.documents_label_index.to_owned(); | ||
| let documents_ids = match documents_index.get(label) { | ||
@@ -316,13 +318,12 @@ Some(documents_ids) => documents_ids, | ||
| }; | ||
| let documents = match &self.quickner { | ||
| quickner => { | ||
| let documents = documents_ids | ||
| .into_iter() | ||
| .map(|id| { | ||
| let document = quickner.documents_hash.get(id).unwrap(); | ||
| PyDocument::from(document.to_owned()) | ||
| }) | ||
| .collect(); | ||
| documents | ||
| } | ||
| let quickner = &self.quickner; | ||
| let documents = { | ||
| let documents = documents_ids | ||
| .iter() | ||
| .map(|id| { | ||
| let document = quickner.documents_hash.get(id).unwrap(); | ||
| PyDocument::from(document.to_owned()) | ||
| }) | ||
| .collect(); | ||
| documents | ||
| }; | ||
@@ -334,5 +335,4 @@ documents | ||
| pub fn find_documents_by_entity(&self, name: &str) -> Vec<PyDocument> { | ||
| let documents_entities_index = match &self.quickner { | ||
| quickner => quickner.documents_entities_index.to_owned(), | ||
| }; | ||
| let quickner = &self.quickner; | ||
| let documents_entities_index = quickner.documents_entities_index.to_owned(); | ||
| let binding = name.to_lowercase(); | ||
@@ -344,13 +344,12 @@ let name = binding.as_str(); | ||
| }; | ||
| let documents = match &self.quickner { | ||
| quickner => { | ||
| let documents = documents_ids | ||
| .into_iter() | ||
| .map(|id| { | ||
| let document = quickner.documents_hash.get(id).unwrap(); | ||
| PyDocument::from(document.to_owned()) | ||
| }) | ||
| .collect(); | ||
| documents | ||
| } | ||
| let quickner = &self.quickner; | ||
| let documents = { | ||
| let documents = documents_ids | ||
| .iter() | ||
| .map(|id| { | ||
| let document = quickner.documents_hash.get(id).unwrap(); | ||
| PyDocument::from(document.to_owned()) | ||
| }) | ||
| .collect(); | ||
| documents | ||
| }; | ||
@@ -460,3 +459,3 @@ documents | ||
| }, | ||
| config_path: quickner.config_file, | ||
| config_path: quickner.config_file.unwrap_or("".to_string()), | ||
| documents: quickner | ||
@@ -463,0 +462,0 @@ .documents |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
20733702
0