pelias-schema
Advanced tools
Comparing version 5.6.0 to 5.6.1
{ | ||
"name": "pelias-schema", | ||
"version": "5.6.0", | ||
"version": "5.6.1", | ||
"author": "pelias", | ||
@@ -5,0 +5,0 @@ "description": "Elasticsearch schema files and tooling for Pelias", |
@@ -37,2 +37,3 @@ const _ = require('lodash'); | ||
"trim", | ||
"synonyms/custom_admin/multiword", | ||
"admin_synonyms_multiplexer", | ||
@@ -53,2 +54,3 @@ "icu_folding", | ||
"trim", | ||
"synonyms/custom_name/multiword", | ||
"name_synonyms_multiplexer", | ||
@@ -86,2 +88,3 @@ "icu_folding", | ||
"remove_duplicate_spaces", | ||
"synonyms/custom_name/multiword", | ||
"name_synonyms_multiplexer", | ||
@@ -132,2 +135,3 @@ "icu_folding", | ||
"remove_duplicate_spaces", | ||
"synonyms/custom_street/multiword", | ||
"street_synonyms_multiplexer", | ||
@@ -234,7 +238,23 @@ "icu_folding", | ||
// underscores and the file extension is removed. | ||
_.each(synonyms, (synonym, name) => { | ||
// note: if no synonym entries are present in the list we use an array | ||
// containing an empty space to avoid elasticsearch schema parsing errors. | ||
_.each(synonyms, (entries, name) => { | ||
// same tokenizer regex as above except without comma | ||
// (which is a delimeter within the synonym files) | ||
const tokenizerRegex = new RegExp('[\\s/\\\\-]+'); | ||
const singleWordEntries = entries.filter(e => !tokenizerRegex.test(e)) | ||
const multiWordEntries = entries.filter(e => tokenizerRegex.test(e)) | ||
// generate a filter containing single-word synonyms | ||
settings.analysis.filter[`synonyms/${name}`] = { | ||
"type": "synonym", | ||
"synonyms": !_.isEmpty(synonym) ? synonym : [''] | ||
"synonyms": !_.isEmpty(singleWordEntries) ? singleWordEntries : [''] | ||
}; | ||
// generate a filter containing multi-word synonyms | ||
settings.analysis.filter[`synonyms/${name}/multiword`] = { | ||
"type": "synonym", | ||
"synonyms": !_.isEmpty(multiWordEntries) ? multiWordEntries : [''] | ||
}; | ||
}); | ||
@@ -241,0 +261,0 @@ |
@@ -5,2 +5,7 @@ const _ = require('lodash'); | ||
// same tokenizer regex as the schema | ||
const TOKENIZER_REGEX = new RegExp('[\\s,/\\\\-]+'); | ||
const DEMIMETER_REGEX = /,|=>/g | ||
const REPLACEMENT_REGEX = /=>/ | ||
/** | ||
@@ -26,3 +31,3 @@ * The synonyms linter attempts to warn the user when making | ||
// split the lines by delimeter | ||
let tokens = line.split(/,|=>/g).map(t => t.trim()); | ||
let tokens = line.split(DEMIMETER_REGEX).map(t => t.trim()); | ||
@@ -46,2 +51,3 @@ // strip blacklisted punctuation from synonyms | ||
multiWordCheck(line, logprefix, tokens); | ||
tokenReplacementCheck(line, logprefix); | ||
// tokenLengthCheck(line, logprefix, tokens); | ||
@@ -74,3 +80,3 @@ }) | ||
_.each(tokens, token => { | ||
if (/\s/.test(token)){ | ||
if (TOKENIZER_REGEX.test(token)){ | ||
logger.warn(`${logprefix} multi word synonyms may cause issues with phrase queries:`, token); | ||
@@ -81,2 +87,8 @@ } | ||
function tokenReplacementCheck(line, logprefix) { | ||
if (REPLACEMENT_REGEX.test(line)) { | ||
logger.warn(`${logprefix} synonym rule '=>' is not supported, use ',' instead`); | ||
} | ||
} | ||
function tokenLengthCheck(line, logprefix, tokens) { | ||
@@ -83,0 +95,0 @@ _.each(tokens, token => { |
abbaye, abe | ||
auto-école, autoécole, autoecole | ||
autoécole, autoecole | ||
aéroport, aeroport | ||
@@ -4,0 +4,0 @@ bastide, bstd |
@@ -94,8 +94,8 @@ abbey, abby | ||
cross, cs, crss | ||
crossing, crsg, xing, csg, x-ing | ||
crossroad, crd, xroad, x-road, xrd, x-rd | ||
crossing, crsg, xing, csg | ||
crossroad, crd, xroad, xrd | ||
crossroads, xrds | ||
crossway, cowy, crwy, xway, xwy, x-way | ||
crossway, cowy, crwy, xway, xwy | ||
cruiseway, cuwy, crwy | ||
cul-de-sac, culdesac, cds, cusac, csac | ||
culdesac, cds, cusac, csac | ||
curve, cve, crv, crve, curv | ||
@@ -102,0 +102,0 @@ cutting, cttg, ctg, cutt |
@@ -86,2 +86,3 @@ var path = require('path'), | ||
"trim", | ||
"synonyms/custom_admin/multiword", | ||
"admin_synonyms_multiplexer", | ||
@@ -134,2 +135,3 @@ "icu_folding", | ||
"trim", | ||
"synonyms/custom_name/multiword", | ||
"name_synonyms_multiplexer", | ||
@@ -191,2 +193,3 @@ "icu_folding", | ||
"remove_duplicate_spaces", | ||
"synonyms/custom_name/multiword", | ||
"name_synonyms_multiplexer", | ||
@@ -299,2 +302,3 @@ "icu_folding", | ||
"remove_duplicate_spaces", | ||
"synonyms/custom_street/multiword", | ||
"street_synonyms_multiplexer", | ||
@@ -301,0 +305,0 @@ "icu_folding", |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
293304
7004