Extr
Named Entity Recognition (NER) and Relation Extraction (RE) library using Regular Expressions
Install
pip install extr
Example
text = 'Ted is a Pitcher.'
Find Named Entities from text.
from extr import RegEx, RegExLabel
from extr.entities import EntityExtractor
entity_extractor = EntityExtractor([
RegExLabel('PERSON', [
RegEx([r'ted'], re.IGNORECASE)
]),
RegExLabel('POSITION', [
RegEx([r'pitcher'], re.IGNORECASE)
]),
])
entities = entity_extractor.get_entities(text)
or add a knowledge base
from extr import RegEx, RegExLabel
from extr.entities import create_entity_extractor
entity_extractor = create_entity_extractor(
[
RegExLabel('POSITION', [
RegEx([r'pitcher'], re.IGNORECASE)
]),
],
kb={
'PERSON': ['Ted']
}
)
entities = entity_extractor.get_entities(text)
2. Visualize Entities in HTML
Annotate text to display in HTML.
from extr.entities.viewers import HtmlViewer
viewer = HtmlViewer()
viewer.append(text, entities)
html = viewer.create_view(custom_styles="""
.lb-PERSON {
background-color: orange;
}
.lb-POSITION {
background-color: yellow;
}
""")

Annotate and Extract Relationships between Entities
from extr.entities import EntityAnnotator
from extr.relations import RelationExtractor, \
RegExRelationLabelBuilder
relationship = RegExRelationLabelBuilder('is_a') \
.add_e1_to_e2(
'PERSON',
[
r'\s+is\s+a\s+',
],
'POSITION'
) \
.build()
relations_to_extract = [relationship]
annotated_text = EntityAnnotator().annotate(text, entities)
relations = RelationExtractor(relations_to_extract).extract(annotated_text, entities)