replacy - PyPI Package Compare versions

+1

-1

PKG-INFO

		Metadata-Version: 2.1
		Name: replacy
		Version: 3.6.1
		Version: 2.1.0
		Summary: ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences.
		@@ -5,0 +5,0 @@ License: MIT

+4

-4

pyproject.toml

		[tool.poetry]
		name = "replacy"
		version = "3.6.1"
		name = "replaCy"
		version = "2.1.0"
		description = "ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences."
		@@ -26,4 +26,4 @@ authors = [
		pytest = "^5.3.2"
		spacy= "^3.0.6"
		en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz" }
		spacy= "2.2.0"
		en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz" }
		kenlm = { git = "https://github.com/kpu/kenlm", rev = "master" }

+51

-59

replacy/__init__.py

		@@ -126,3 +126,2 @@ import copy
		self.matcher = Matcher(self.nlp.vocab)
		self.predicates = {}
		self._init_matcher()
		@@ -159,13 +158,9 @@ self.spans: List[Span] = []
		match_hooks = ps.get("match_hook", [])
		self.predicates[match_name] = get_predicates(
		match_hooks, self.default_match_hooks, self.custom_match_hooks
		)
		self.matcher.add(match_name, patterns)
		callback = self._get_callback(match_name, match_hooks)
		self._add_matcher_rule(match_name, patterns, callback)

		@staticmethod
		def _fix_alignment_multiple_whitespaces(alignments):
		return [int(a / 2) for a in alignments]
		def _add_matcher_rule(self, match_name, patterns, callback):
		self.matcher.add(match_name, callback, patterns)

		@staticmethod
		def _allow_multiple_whitespaces(patterns):
		def _allow_multiple_whitespaces(self, patterns):
		"""
		@@ -178,56 +173,60 @@ allow matching tokens separated by multiple whitespaces
		"""
		if True:
		if self.allow_multiple_whitespaces:
		white_pattern = {"IS_SPACE": True, "OP": "?"}
		normalized_patterns = []
		for pattern in patterns:
		normalized_pattern = [white_pattern]
		for p in pattern:
		normalized_pattern += [p, white_pattern]
		normalized_patterns.append(normalized_pattern)
		normalized_patterns = [white_pattern]
		for p in patterns:
		normalized_patterns += [p, white_pattern]
		patterns = normalized_patterns
		return patterns

		@staticmethod
		def _remove_unsupported(patterns):
		def _remove_unsupported(self, patterns):
		# remove custom attributes not supported by spaCy Matcher
		for pattern in patterns:
		for p in pattern:
		if "TEMPLATE_ID" in p:
		del p["TEMPLATE_ID"]
		for p in patterns:
		if "TEMPLATE_ID" in p:
		del p["TEMPLATE_ID"]
		return patterns

		def _callback(self, doc, match):
		match_id, start, end, alignments = match
		alignments = ReplaceMatcher._fix_alignment_multiple_whitespaces(alignments)
		def _get_callback(self, match_name, match_hooks):
		"""
		Most matches have the same logic to be executed each time a match is found
		Some matches have extra logic, defined in match_hooks
		"""
		# Get predicates once, callback is returned in a closure with this information
		predicates = get_predicates(
		match_hooks, self.default_match_hooks, self.custom_match_hooks
		)

		match_name = self.nlp.vocab[match_id].text
		def cb(matcher, doc, i, matches):
		match_id, start, end = matches[i]

		for pred in self.predicates[match_name]:
		try:
		if pred(doc, start, end):
		return None
		except IndexError:
		break
		for pred in predicates:
		try:
		if pred(doc, start, end):
		return None
		except IndexError:
		break
		match_name = self.nlp.vocab[match_id].text
		span = self.Span(doc, start, end)

		span = self.Span(doc, start, end)
		# find in match_dict if needed
		span._.match_name = match_name

		# find in match_dict if needed
		span._.match_name = match_name
		pre_suggestions = self.match_dict[match_name]["suggestions"]

		pre_suggestions = self.match_dict[match_name]["suggestions"]
		span._.suggestions = []

		span._.suggestions = []
		for i, x in enumerate(pre_suggestions):
		span._.suggestions += self.process_suggestions(
		x, doc, start, end, match_name, i
		)

		for i, x in enumerate(pre_suggestions):
		span._.suggestions += self.process_suggestions(
		x, doc, start, end, match_name, i, alignments
		)
		for novel_prop, default_value in self.novel_prop_defaults.items():
		setattr(
		span._,
		novel_prop,
		self.match_dict[match_name].get(novel_prop, default_value),
		)
		self.spans.append(span)

		for novel_prop, default_value in self.novel_prop_defaults.items():
		setattr(
		span._,
		novel_prop,
		self.match_dict[match_name].get(novel_prop, default_value),
		)
		self.spans.append(span)
		return cb

		@@ -279,3 +278,3 @@ def _set_scorer(self, lm_path):
		def process_suggestions(
		self, pre_suggestion, doc, start, end, match_name, pre_suggestion_id, alignments
		self, pre_suggestion, doc, start, end, match_name, pre_suggestion_id
		):
		@@ -286,3 +285,3 @@ # get token <-> pattern correspondence
		suggestion_variants = self.suggestion_gen(
		pre_suggestion, doc, start, end, pattern, pre_suggestion_id, alignments
		pre_suggestion, doc, start, end, pattern, pre_suggestion_id
		)
		@@ -386,10 +385,3 @@ # assert there aren't more than max_suggestions_count
		# this fills up self.spans
		matches = self.matcher(doc, with_alignments=True)

		# do the callback here instead of to pass it as callback on match
		# here we alignment information to use for pattern ref
		# we don't have this info on match callback
		for match in matches:
		self._callback(doc, match)

		self.matcher(doc)
		for _, component in self.pipeline:
		@@ -396,0 +388,0 @@ # the default pipeline will:

+0

-1

replacy/db.py

		@@ -72,3 +72,2 @@ import json
		import kenlm

		return kenlm.Model(model_path)

+11

-29

replacy/default_match_hooks.py

		@@ -181,8 +181,6 @@ """

		def relative_x_is_y(
		children_or_ancestors: str, pos_or_dep: str, value: Union[str, List[str]]
		) -> SpacyMatchPredicate:
		"""
		This hook looks at all the tokens in a matched span to determine
		whether any of the children or the first ancestor have a given .pos_ or
		def relative_x_is_y(children_or_ancestors: str, pos_or_dep: str, value: Union[str, List[str]]) -> SpacyMatchPredicate:
		'''
		This hook looks at all the tokens in a matched span to determine
		whether any of the children or the first ancestor have a given .pos_ or
		.dep_. This replaces the implementation of the Dependency Matcher in
		@@ -202,3 +200,3 @@ the previous version by looking at token.children or token.ancestors in
		}
		"""
		'''

		@@ -215,8 +213,6 @@ if not isinstance(value, list):
		if children_or_ancestors not in ["children", "ancestors"]:
		raise ValueError(
		"children_or_ancestors must be set to either `children` or `ancestors`"
		)
		raise ValueError("children_or_ancestors must be set to either `children` or `ancestors`")

		if pos_or_dep not in ["pos", "dep", "tag"]:
		raise ValueError("pos_or_dep must be set to either `pos`, `dep`, or `tag`!")
		if pos_or_dep not in ["pos", "dep"]:
		raise ValueError("pos_or_dep must be set to either `pos` or `dep`!")

		@@ -229,14 +225,6 @@ def _in_children(doc, start, end):
		if pos_or_dep == "pos":
		return any(
		[child.pos_ == val for tok in match_span for child in tok.children]
		)
		return any([child.pos_ == val for tok in match_span for child in tok.children])
		elif pos_or_dep == "dep":
		return any(
		[child.dep_ == val for tok in match_span for child in tok.children]
		)
		elif pos_or_dep == "tag":
		return any(
		[child.tag_ == val for tok in match_span for child in tok.children]
		)

		return any([child.dep_ == val for tok in match_span for child in tok.children])

		def _in_ancestors(doc, start, end):
		@@ -259,8 +247,2 @@ if end >= len(doc):
		return False
		if pos_or_dep == "tag":
		for t in match_span:
		ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None
		if ancestor and ancestor.tag_ == val:
		return True
		return False

		@@ -267,0 +249,0 @@ if children_or_ancestors == "children":

+3

-3

replacy/filter_0distance.py

		@@ -12,3 +12,3 @@ from typing import List
		for suggestion in span.suggestions:
		if (span.doc[span.start : span.end].text) == suggestion:
		if (span.doc[span.start:span.end].text) == suggestion:
		continue
		@@ -29,3 +29,3 @@ suggestions.append(suggestion)
		if len(span.suggestions):
		span_text = span.doc[span.start : span.end].text.rstrip(" \r\n")
		span_text = span.doc[span.start:span.end].text.rstrip(" \r\n")
		suggestions = []
		@@ -42,2 +42,2 @@ for suggestion in span.suggestions:
		filtered_spans.append(span)
		return filtered_spans
		return filtered_spans

+28

-0

replacy/inflector.py

		@@ -202,1 +202,29 @@ import warnings
		return self.inflect_string(word, tag=tag, pos=pos)

		def insert(self, doc, suggestion: str, index: int):
		"""
		Returns the sentence with inserted inflected token.
		If inflection is not supported - returns the original sentence.
		ex. She washed her eggs. -> She ate her eggs.
		If many inflections returned, take the first form.
		"""

		# if string passed, conversion to doc
		try:
		doc.text
		except AttributeError:
		doc = self.nlp(doc)

		infl_tokens = self.auto_inflect(doc, suggestion, index)

		if len(infl_tokens):
		infl_token = infl_tokens[0]

		if infl_token:
		token = doc[index]
		changed_sent = "".join(
		[doc.text[: token.idx], infl_token, doc.text[token.idx + len(token) :],]
		)
		return changed_sent
		else:
		return doc.text

+171

-11

replacy/ref_matcher.py

		@@ -7,12 +7,172 @@ import copy
		class RefMatcher:
		def __call__(self, span, orig_pattern, alignments):
		# not all parameters are needed, adding it to have same signature as RefMatcher
		pattern_indexes = set(alignments)
		return {
		pattern_idx: [
		span_token_idx
		for span_token_idx, pattern_index in enumerate(alignments)
		if pattern_index == pattern_idx
		]
		for pattern_idx in pattern_indexes
		}
		def __init__(self, nlp):
		self.nlp = nlp
		self.matcher = Matcher(nlp.vocab)

		def clean_matcher(self):
		# no native method to clean spaCy matcher
		# or retrieve pattern names
		# so always add ints, starting from zero
		# and clean ints from 0 till not found
		i = 0
		while len(self.matcher) > 0 and i < 100:
		if i in self.matcher:
		self.matcher.remove(i)
		i += 1

		@staticmethod
		def is_negative(p):
		if "OP" in p and p["OP"] == "!":
		return True
		return False

		@staticmethod
		def is_droppable(p):
		if "OP" in p and p["OP"] in ["*", "?"]:
		return True
		return False

		@staticmethod
		def is_multitoken(p):
		if "OP" in p and p["OP"] in ["*", "+"]:
		return True
		return False

		def remove_skipped_ops(self, span, pattern):
		skipped_idx = []

		op_tokens = [i for (i, p) in enumerate(pattern) if RefMatcher.is_droppable(p)]

		for op in op_tokens:
		op_pattern = copy.deepcopy(pattern)
		# remove "?" to require 1 instead of 0
		if op_pattern[op]["OP"] == "?":
		if len(op_pattern[op]) == 1:
		# if no more props,
		# add dummy string that will never match
		# since its not 1 token :)
		op_pattern[op]["TEXT"] = "alice and bob"
		op_pattern[op]["OP"] = "!"
		del op_pattern[op]["OP"]
		# change "*" to "+", to require 1+ instead of 0+
		elif op_pattern[op]["OP"] == "*":
		op_pattern[op]["OP"] = "+"
		self.matcher.add(op, None, op_pattern)

		# check whether it still matches
		matches = self.matcher(span.as_doc())
		max_matches = [m for (m, s, e) in matches if (s == 0) and (e == len(span))]

		# clean the matcher
		self.clean_matcher()

		non_op_pattern = []
		for i, p in enumerate(pattern):
		# is optional
		if "OP" in p:
		# but not found
		if not i in max_matches and not RefMatcher.is_negative(p):
		# => to do marked non matched, skip
		skipped_idx.append(i)
		continue
		else:
		if p["OP"] == "+":
		if len(p) == 1:
		# if no more props,
		# add dummy string that will never match
		# since its not 1 token :)
		p["TEXT"] = "alice and bob"
		p["OP"] = "!"
		else:
		del p["OP"]
		elif p["OP"] == "*":
		p["OP"] = "+"
		non_op_pattern.append(p)

		return non_op_pattern, skipped_idx

		def insert_empty_idx(self, pattern_ref, idx):
		pattern_ref_insert = {}
		for p, v in pattern_ref.items():
		if p >= idx:
		pattern_ref_insert[p + 1] = v
		else:
		pattern_ref_insert[p] = v
		pattern_ref_insert[idx] = []
		return pattern_ref_insert

		def shift_pattern_ref(self, pattern_ref, skipped_idx):
		for idx in skipped_idx:
		pattern_ref = self.insert_empty_idx(pattern_ref, idx)
		return pattern_ref

		def __call__(self, span, orig_pattern):

		pattern = copy.deepcopy(orig_pattern)

		# remove props not supported by SpaCy matcher:
		for p in pattern:
		if "TEMPLATE_ID" in p:
		del p["TEMPLATE_ID"]

		# case I: tokens <-> patterns
		# if lengths match
		# if no OP
		# => everything has been matched
		if len(span) == len(pattern) and not any(["OP" in p for p in pattern]):
		return {k: [k] for k in range(len(pattern))}

		# check which tokens are matched, remove non matched
		non_op_pattern, skipped_idx = self.remove_skipped_ops(span, pattern)

		# case II:
		# if lengths match
		# if no multitoken OPs
		# => everything has been matched
		if len(span) == len(non_op_pattern) and not any(
		[RefMatcher.is_multitoken(p) for p in non_op_pattern]
		):
		pattern_ref = {k: [k] for k in range(len(non_op_pattern))}
		return self.shift_pattern_ref(pattern_ref, skipped_idx)

		# case III:
		# worst case
		# get shifts for multitokens
		# ie rematching cropped spans and patterns

		# A. get cropped patterns
		for i in range(len(non_op_pattern)):
		self.matcher.add(i, None, non_op_pattern[i:])

		# B. get cropped spans
		docs = [span[i:].as_doc() for i in range(len(span))]

		# C. rematch
		matches = self.matcher.pipe(docs, batch_size=len(span), return_matches=True)

		# D. get pattern_ref
		pattern_ref = {}

		for i, (d, m) in enumerate(matches):
		# take max span match for doc
		if len(m):
		# len 0 shouldn't happen except weird white spaces
		m_id, m_start, m_end = max(m, key=lambda x: x[2] - x[1])

		# if cropped span matches cropped pattern
		# 1st token of cropped span belongs to 1st cropped pattern item
		if not m_id in pattern_ref:
		pattern_ref[m_id] = [i]
		else:
		# no changes in pattern
		# pattern item had more tokens matched
		# ex. "very fast ..." & "fast ... "
		# matched with {"POS": "ADJ", "OP": "+"} ...
		pattern_ref[m_id].append(i)

		# clean
		self.clean_matcher()

		# shift by skipped ops
		pattern_ref = self.shift_pattern_ref(pattern_ref, skipped_idx)
		return pattern_ref

+39

-61

replacy/resources/match_dict_schema.json

		@@ -5,3 +5,3 @@ {
		"definitions": {
		"replacyAttributeItem": {
		"replacyAttribute": {
		"type": "object",
		@@ -57,9 +57,2 @@ "properties": {
		},
		"replacyAttribute": {
		"type": "array",
		"items": {
		"$ref": "#/definitions/replacyAttributeItem"
		},
		"minItems": 1
		},
		"spacyOperator": {
		@@ -109,3 +102,3 @@ "type": "object",
		},
		"textOperator": {
		"textOperator":{
		"type": "object",
		@@ -149,53 +142,38 @@ "additionalProperties": false,
		"properties": {
		"TEXT": {
		"$ref": "#/definitions/textValue"
		},
		"FROM_TEMPLATE_ID": {
		"type": "integer"
		},
		"PATTERN_REF": {
		"type": "integer"
		},
		"REPLACY_OP": {
		"enum": ["LOWER", "UPPER", "TITLE"]
		},
		"INFLECTION": {
		"enum": [
		"ADJ",
		"ADV",
		"PROPN",
		"VERB",
		"AUX",
		"JJ",
		"JJR",
		"JJS",
		"RB",
		"RBR",
		"RBS",
		"NN",
		"NNS",
		"NNP",
		"NNPS",
		"VB",
		"VBD",
		"VBG",
		"VBN",
		"VBP",
		"VBZ",
		"MD",
		"ALL"
		]
		},
		"REGEX": {
		"type": "string",
		"minLength": 2
		},
		"SUFFIX": {
		"type": "string",
		"minLength": 1
		},
		"PREFIX": {
		"type": "string",
		"minLength": 1
		}
		"TEXT": {
		"$ref": "#/definitions/textValue"
		},
		"FROM_TEMPLATE_ID": {
		"type": "integer"
		},
		"PATTERN_REF": {
		"type": "integer"
		},
		"INFLECTION": {
		"enum": [
		"ADJ",
		"ADV",
		"PROPN",
		"VERB",
		"AUX",
		"JJ",
		"JJR",
		"JJS",
		"RB",
		"RBR",
		"RBS",
		"NN",
		"NNS",
		"NNP",
		"NNPS",
		"VB",
		"VBD",
		"VBG",
		"VBN",
		"VBP",
		"VBZ",
		"MD",
		"ALL"
		]
		}
		}
		@@ -245,3 +223,3 @@ },
		"patternProperties": {
		"^[a-z_-][A-Za-z0-9_-]*$": {
		"^[a-z_][A-Za-z0-9_]*$": {
		"type": "object",
		@@ -248,0 +226,0 @@ "properties": {

+234

-311

replacy/resources/match_dict.json

		{
		"extract-revenge": {
		"patterns": [
		[
		{
		"LEMMA": "extract",
		"TEMPLATE_ID": 1
		}
		]
		],
		"suggestions": [
		[
		{
		"TEXT": "exact",
		"FROM_TEMPLATE_ID": 1
		}
		]
		],
		"match_hook": [
		{
		"name": "succeeded_by_phrase",
		"args": "revenge",
		"match_if_predicate_is": true
		}
		],
		"test": {
		"positive": [
		"And at the same time extract revenge on those he so despises?",
		"Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team."
		],
		"negative": [
		"Mother flavours her custards with lemon extract."
		]
		"extract-revenge": {
		"patterns": [
		{
		"LEMMA": "extract",
		"TEMPLATE_ID": 1
		}
		],
		"suggestions": [
		[
		{
		"TEXT": "exact",
		"FROM_TEMPLATE_ID": 1
		}
		},
		"make-due": {
		"patterns": [
		[
		{
		"LEMMA": "make",
		"TEMPLATE_ID": 1
		},
		{
		"LOWER": "due"
		}
		]
		],
		"suggestions": [
		[
		{
		"TEXT": "make",
		"FROM_TEMPLATE_ID": 1
		},
		{
		"TEXT": "do"
		}
		]
		],
		"test": {
		"positive": [
		"Viewers will have to make due with tired re-runs and second-rate movies."
		],
		"negative": [
		"The empty vessels make the greatest sound.",
		"I'll make do.",
		"She only has sons; she'll make dudes."
		]
		]
		],
		"match_hook": [
		{
		"name": "succeeded_by_phrase",
		"args": "revenge",
		"match_if_predicate_is": true
		}
		],
		"test": {
		"positive": [
		"And at the same time extract revenge on those he so despises?",
		"Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team."
		],
		"negative": ["Mother flavours her custards with lemon extract."]
		}
		},
		"make-due": {
		"patterns": [
		{
		"LEMMA": "make",
		"TEMPLATE_ID": 1
		},
		{
		"LOWER": "due"
		}
		],
		"suggestions": [
		[
		{
		"TEXT": "make",
		"FROM_TEMPLATE_ID": 1
		},
		"comment": "this is an example comment",
		"description": "The expression is \"make do\".",
		"category": "R:VERB",
		"unexpected": "replaCy should handle arbitrary properties here, and attach them to the relevant spans"
		},
		"requirement": {
		"patterns": [
		[
		{
		"LEMMA": "requirement",
		"POS": "NOUN",
		"TEMPLATE_ID": 1
		}
		]
		],
		"suggestions": [
		[
		{
		"TEXT": "need",
		"FROM_TEMPLATE_ID": 1
		}
		]
		],
		"match_hook": [
		{
		"name": "part_of_compound",
		"match_if_predicate_is": false
		},
		{
		"name": "preceded_by_lemma",
		"kwargs": {
		"lemma": "hello",
		"distance": 22
		},
		"match_if_predicate_is": false
		}
		],
		"test": {
		"positive": [
		"The system has the following requirements: blood of a virgin, suffering, and cat food.",
		"Our immediate requirement is extra staff."
		],
		"negative": [
		"There is a residency requirement for obtaining citizenship.",
		"What is the minimum entrance requirement for this course?"
		]
		{
		"TEXT": "do"
		}
		]
		],
		"test": {
		"positive": [
		"Viewers will have to make due with tired re-runs and second-rate movies."
		],
		"negative": [
		"The empty vessels make the greatest sound.",
		"I'll make do.",
		"She only has sons; she'll make dudes."
		]
		},
		"lt-example": {
		"patterns": [
		[
		{
		"LOWER": {
		"IN": [
		"have",
		"has"
		]
		}
		},
		{
		"TAG": {
		"IN": [
		"VBD",
		"VBP",
		"VB",
		"VBN"
		]
		}
		},
		{
		"TAG": {
		"NOT_IN": [
		"VBG"
		]
		}
		}
		]
		],
		"suggestions": [
		[
		{
		"PATTERN_REF": 0
		},
		{
		"PATTERN_REF": 1,
		"INFLECTION": "VBN"
		},
		{
		"PATTERN_REF": 2
		}
		]
		],
		"description": "Possible agreement error -- use past participle here",
		"test": {
		"positive": [
		"I have eat this"
		],
		"negative": [
		"I ate this"
		]
		"comment": "this is an example comment",
		"description": "The expression is \"make do\".",
		"category": "R:VERB",
		"unexpected": "replaCy should handle arbitrary properties here, and attach them to the relevant spans"
		},
		"requirement": {
		"patterns": [
		{
		"LEMMA": "requirement",
		"POS": "NOUN",
		"TEMPLATE_ID": 1
		}
		],
		"suggestions": [
		[
		{
		"TEXT": "need",
		"FROM_TEMPLATE_ID": 1
		}
		},
		"assemble_attach_together": {
		"comment": "Match the word together if it is a modifier of any form of assemble or attach, and suggest removing it",
		"patterns": [
		[
		{
		"LOWER": "together"
		}
		]
		],
		"match_hook": [
		{
		"name": "relative_x_is_y",
		"kwargs": {
		"children_or_ancestors": "ancestors",
		"pos_or_dep": "dep",
		"value": "ROOT"
		},
		"match_if_predicate_is": true
		}
		],
		"suggestions": [
		[
		{
		"TEXT": ""
		}
		]
		],
		"test": {
		"positive": [
		"Avengers, assemble the team together!",
		"We assembled the furniture together."
		],
		"negative": [
		"After we assemble, we can go together",
		"My arm is attached to my shoulder, I like that they are together."
		]
		]
		],
		"match_hook": [
		{
		"name": "part_of_compound",
		"match_if_predicate_is": false
		},
		{
		"name": "preceded_by_lemma",
		"kwargs": {
		"lemma": "hello",
		"distance": 22
		},
		"match_if_predicate_is": false
		}
		],
		"test": {
		"positive": [
		"The system has the following requirements: blood of a virgin, suffering, and cat food.",
		"Our immediate requirement is extra staff."
		],
		"negative": [
		"There is a residency requirement for obtaining citizenship.",
		"What is the minimum entrance requirement for this course?"
		]
		}
		},
		"lt-example": {
		"patterns": [
		{
		"LOWER": {
		"IN": ["have", "has"]
		}
		},
		"effective_in_its_ability": {
		"patterns": [
		[
		{
		"LEMMA": "be",
		"TEMPLATE_ID": 1
		},
		{
		"LOWER": "effective"
		},
		{
		"LOWER": "in"
		},
		{
		"DEP": "poss"
		},
		{
		"LOWER": "ability"
		},
		{
		"LOWER": "to"
		},
		{
		"POS": "VERB"
		}
		]
		],
		"suggestions": [
		[
		{
		"TEXT": "effectively"
		},
		{
		"PATTERN_REF": 6,
		"FROM_TEMPLATE_ID": 1
		}
		]
		],
		"comment": "You can use pattern_ref and from_template_id together",
		"test": {
		"positive": [
		"The pail was effective in its ability to carry water"
		],
		"negative": [
		"The pail wasn't effective in its ability to carry water"
		]
		},
		{
		"TAG": {
		"IN": ["VBD", "VBP", "VB"]
		}
		},
		"dupe-test": {
		"patterns": [
		[
		{
		"LEMMA": "make",
		"TEMPLATE_ID": 1
		}
		]
		],
		"suggestions": [
		[
		{
		"TEXT": "build",
		"FROM_TEMPLATE_ID": 1
		}
		]
		],
		"comment": "This is a bad match, it is here to demonstrate overlap behavior",
		"test": {
		"positive": [
		"I will make something"
		],
		"negative": [
		"I will build something"
		]
		},
		{
		"TAG": {
		"NOT_IN": ["VBG"]
		}
		},
		"all-caps": {
		"patterns": [
		[
		{
		"IS_UPPER": true,
		"TEXT": {
		"REGEX": "^[A-Z]{2,}$"
		},
		"OP": "+"
		},
		{
		"IS_LOWER": true,
		"OP": "*"
		}
		]
		],
		"suggestions": [
		[
		{
		"PATTERN_REF": 0,
		"REPLACY_OP": "LOWER"
		},
		{
		"PATTERN_REF": 1,
		"REPLACY_OP": "UPPER"
		}
		]
		],
		"test": {
		"positive": [
		"TENNIS is a lovely game.",
		"THIS IS SO SILLY",
		"THIS IS SO SILLY waay to go"
		],
		"negative": [
		"this is so silly"
		]
		}
		],
		"suggestions": [
		[
		{
		"PATTERN_REF": 0
		},
		{
		"PATTERN_REF": 1,
		"INFLECTION": "VBN"
		},
		{
		"PATTERN_REF": 2
		}
		]
		],
		"description": "Possible agreement error -- use past participle here",
		"test": {
		"positive": ["I have eat this"],
		"negative": ["I have eaten this"]
		}
		},
		"assemble_attach_together": {
		"comment": "Match the word together if it is a modifier of any form of assemble or attach, and suggest removing it",
		"patterns": [
		{
		"LOWER": "together"
		}
		],
		"match_hook": [
		{
		"name": "relative_x_is_y",
		"kwargs": {
		"children_or_ancestors": "ancestors",
		"pos_or_dep": "dep",
		"value": "ROOT"
		},
		"match_if_predicate_is": true
		}
		],
		"suggestions": [
		[
		{
		"TEXT": ""
		}
		]
		],
		"test": {
		"positive": [
		"Avengers, assemble the team together!",
		"We assembled the furniture together."
		],
		"negative": [
		"After we assemble, we can go together",
		"My arm is attached to my shoulder, I like that they are together."
		]
		}
		},
		"effective_in_its_ability": {
		"patterns": [
		{
		"LEMMA": "be",
		"TEMPLATE_ID": 1
		},
		{
		"LOWER": "effective"
		},
		{
		"LOWER": "in"
		},
		{
		"DEP": "poss"
		},
		{
		"LOWER": "ability"
		},
		{
		"LOWER": "to"
		},
		{
		"POS": "VERB"
		}
		],
		"suggestions": [
		[
		{
		"TEXT": "effectively"
		},
		{
		"PATTERN_REF": 6,
		"FROM_TEMPLATE_ID": 1
		}
		]
		],
		"comment": "You can use pattern_ref and from_template_id together",
		"test": {
		"positive": ["The pail was effective in its ability to carry water"],
		"negative": ["The pail wasn't effective in its ability to carry water"]
		}
		},
		"dupe-test": {
		"patterns": [
		{
		"LEMMA": "make",
		"TEMPLATE_ID": 1
		}
		],
		"suggestions": [
		[
		{
		"TEXT": "build",
		"FROM_TEMPLATE_ID": 1
		}
		]
		],
		"comment": "This is a bad match, it is here to demonstrate overlap behavior",
		"test": {
		"positive": ["I will make something"],
		"negative": ["I will build something"]
		}
		}
		}

+1

-5

replacy/suggestion_joiner.py

		@@ -8,7 +8,3 @@ from typing import List
		for span in spans:
		suggestions_separator = (
		span.suggestions_separator
		if span.has_extension("suggestions_separator")
		else " "
		)
		suggestions_separator = span.suggestions_separator if span.has_extension('suggestions_separator') else " "
		suggestions: List[str] = []
		@@ -15,0 +11,0 @@ for s in span._.suggestions:

+12

-15

replacy/suggestion.py

		@@ -16,3 +16,3 @@ import re
		self.inflector = Inflector(nlp=nlp, forms_lookup=self.forms_lookup)
		self.ref_matcher = RefMatcher()
		self.ref_matcher = RefMatcher(nlp)
		self.filter_suggestions = filter_suggestions
		@@ -35,9 +35,9 @@ self.default_max_count = default_max_count
		try:
		refd_text = None
		if ref in pattern_ref:
		refd_tokens = pattern_ref[ref]
		if len(refd_tokens):
		min_i = start + min(refd_tokens)
		max_i = start + max(refd_tokens)
		refd_text = doc[min_i : max_i + 1].text
		refd_tokens = pattern_ref[ref]
		if len(refd_tokens):
		min_i = start + min(refd_tokens)
		max_i = start + max(refd_tokens)
		refd_text = doc[min_i : max_i + 1].text
		else:
		refd_text = None
		except:
		@@ -272,5 +272,3 @@ warnings.warn(

		def __call__(
		self, pre_suggestion, doc, start, end, pattern, pre_suggestion_id, alignments
		):
		def __call__(self, pre_suggestion, doc, start, end, pattern, pre_suggestion_id):
		"""
		@@ -295,4 +293,3 @@ Suggestion text:
		# get token <-> pattern correspondence
		pattern_obj = pattern[0]
		pattern_ref = self.ref_matcher(doc[start:end], pattern_obj, alignments)
		pattern_ref = self.ref_matcher(doc[start:end], pattern)

		@@ -303,3 +300,3 @@ generated_suggestions_obj = []
		item_options = SuggestionGenerator.get_options(
		item, doc, start, end, pattern_obj, pattern_ref
		item, doc, start, end, pattern, pattern_ref
		)
		@@ -315,3 +312,3 @@ item["generated"] = item_options
		generated_suggestions_obj,
		pattern_obj,
		pattern,
		pattern_ref,
		@@ -318,0 +315,0 @@ doc,

+6

-15

replacy/test_helper.py

		@@ -11,6 +11,5 @@ import unittest
		class MatchDictTestHelper(unittest.TestCase):

		@staticmethod
		def generate_cases(
		match_dict: Dict[str, Any]
		) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
		def generate_cases(match_dict: Dict[str, Any]) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
		positives: List[Tuple[str, str]] = []
		@@ -33,5 +32,3 @@ negatives: List[Tuple[str, str]] = []
		cls.r_matcher = ReplaceMatcher(nlp, match_dict)
		cls.positive_cases, cls.negative_cases = MatchDictTestHelper.generate_cases(
		match_dict
		)
		cls.positive_cases, cls.negative_cases = MatchDictTestHelper.generate_cases(match_dict)

		@@ -41,5 +38,3 @@ def test_positive(self):
		spans = self.r_matcher(positive_sent)
		spans_from_this_rule = list(
		filter(lambda s: s._.match_name == match_name, spans)
		)
		spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans))
		print(match_name, positive_sent)
		@@ -51,8 +46,4 @@ assert len(spans_from_this_rule) > 0, "Positive case should trigger rule"
		spans = self.r_matcher(negative_sent)
		spans_from_this_rule = list(
		filter(lambda s: s._.match_name == match_name, spans)
		)
		spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans))
		print(match_name, negative_sent)
		assert (
		len(spans_from_this_rule) == 0
		), "Negative case should NOT trigger rule"
		assert len(spans_from_this_rule) == 0, "Negative case should NOT trigger rule"

+0

-1

replacy/util.py

		import warnings
		from typing import Any, Callable, Dict, List, Union

		import spacy
		from functional import seq
		@@ -6,0 +5,0 @@ from jsonschema import validate

+1

-1

replacy/version.py

		# CHANGES HERE HAVE NO EFFECT: ../VERSION is the source of truth
		__version__ = "3.1.0"
		__version__ = "2.0.0"

+1

-1

setup.py

		@@ -15,3 +15,3 @@ # -- coding: utf-8 --
		'name': 'replacy',
		'version': '3.6.1',
		'version': '2.1.0',
		'description': 'ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences.',
		@@ -18,0 +18,0 @@ 'long_description': '<p align="center">\n<img src="./docs/replacy_logo.png" align="center" />\n</p>\n\n# replaCy: match & replace with spaCy\n\nWe found that in multiple projects we had duplicate code for using spaCy’s blazing fast matcher to do the same thing: Match-Replace-Grammaticalize. So we wrote replaCy!\n\n- Match - spaCy’s matcher is great, and lets you match on text, shape, POS, dependency parse, and other features. We extended this with “match hooks”, predicates that get used in the callback function to further refine a match.\n- Replace - Not built into spaCy’s matcher syntax, but easily added. You often want to replace a matched word with some other term.\n- Grammaticalize - If you match on ”LEMMA”: “dance”, and replace with suggestions: ["sing"], but the actual match is danced, you need to conjugate “sing” appropriately. This is the “killer feature” of replaCy\n\n[![spaCy](https://img.shields.io/badge/made%20with%20❤%20and-spaCy-09a3d5.svg)](https://spacy.io)\n[![pypi Version](https://img.shields.io/pypi/v/replacy.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/replacy/)\n[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/ambv/black)\n\n<p align="center">\n<img src="./docs/replacy_ex.png" align="center" />\n</p>\n\n\n## Requirements\n\n- `spacy >= 2.0` (not installed by default, but replaCy needs to be instantiated with an `nlp` object)\n\n## Installation\n\n`pip install replacy`\n\n## Quick start\n\n```python\nfrom replacy import ReplaceMatcher\nfrom replacy.db import load_json\nimport spacy\n\n\nmatch_dict = load_json(\'/path/to/your/match/dict.json\')\n# load nlp spacy model of your choice\nnlp = spacy.load("en_core_web_sm")\n\nrmatcher = ReplaceMatcher(nlp, match_dict=match_dict)\n\n# get inflected suggestions\n# look up the first suggestion\nspan = rmatcher("She extracts revenge.")[0]\nspan._.suggestions\n# >>> [\'exacts\']\n```\n\n## Input\n\nReplaceMatcher accepts both text and spaCy doc.\n\n```python\n# text is ok\nspan = r_matcher("She extracts revenge.")[0]\n\n# doc is ok too\ndoc = nlp("She extracts revenge.")\nspan = r_matcher(doc)[0]\n```\n\n## match_dict.json format\n\nHere is a minimal `match_dict.json`:\n\n```json\n{\n "extract-revenge": {\n "patterns": [\n {\n "LEMMA": "extract",\n "TEMPLATE_ID": 1\n }\n ],\n "suggestions": [\n [\n {\n "TEXT": "exact",\n "FROM_TEMPLATE_ID": 1\n }\n ]\n ],\n "match_hook": [\n {\n "name": "succeeded_by_phrase",\n "args": "revenge",\n "match_if_predicate_is": true\n }\n ],\n "test": {\n "positive": [\n "And at the same time extract revenge on those he so despises?",\n "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team."\n ],\n "negative": ["Mother flavours her custards with lemon extract."]\n }\n }\n}\n```\nFor more information how to compose `match_dict` see our [wiki](https://github.com/Qordobacode/replaCy/wiki/match_dict.json-format): \n\n\n# Citing\n\nIf you use replaCy in your research, please cite with the following BibText\n\n```bibtext\n@misc{havens2019replacy,\n title = {SpaCy match and replace, maintaining conjugation},\n author = {Sam Havens, Aneta Stal, and Manhal Daaboul},\n url = {https://github.com/Qordobacode/replaCy},\n year = {2019}\n}\n',

replacy - pypi Package Compare versions

Improved metrics