Socket
Socket
Sign inDemoInstall

pelias-schema

Package Overview
Dependencies
Maintainers
5
Versions
109
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

pelias-schema - npm Package Compare versions

Comparing version 5.5.1 to 5.6.0

synonyms/directionals/de.txt

4

integration/analyzer_peliasPhrase.js

@@ -81,3 +81,3 @@ // validate analyzer is behaving as expected

// both terms should map to same tokens
var expected3 = [ '0:13509', '1:colfax', '2:ave', '2:avenue', '2:av', '3:s', '3:south', '3:see' ];
var expected3 = [ '0:13509', '1:colfax', '2:ave', '2:avenue', '2:av', '3:s', '3:south' ];
var expected4 = [ '0:13509', '1:colfax', '2:avenue', '2:ave', '2:av', '3:south', '3:s' ];

@@ -88,3 +88,3 @@ assertAnalysis( 'address', '13509 Colfax Ave S', expected3 );

// both terms should map to same tokens
var expected5 = [ '0:100', '1:s', '1:south', '1:see', '2:lake', '2:lk', '3:dr', '3:drive' ];
var expected5 = [ '0:100', '1:s', '1:south', '2:lake', '2:lk', '3:dr', '3:drive' ];
var expected6 = [ '0:100', '1:south', '1:s', '2:lake', '2:lk', '3:drive', '3:dr' ];

@@ -91,0 +91,0 @@ assertAnalysis( 'address', '100 S Lake Dr', expected5 );

// validate analyzer is behaving as expected
const elastictest = require('elastictest')
var tape = require('tape'),
elastictest = require('elastictest'),
schema = require('../schema'),
punctuation = require('../punctuation');
module.exports.tests = {};

@@ -9,0 +5,0 @@

@@ -60,2 +60,8 @@ const _ = require('lodash');

},
// the 'analyze' assertion indexes $text using the analyzer specified
// in the $analyzer var and then checks that all of the tokens in
// $expected are contained within the index.
// note: previously it asserted that $expected was deeply equal to the
// tokens in the index, now it only asserts that they are all intersect, the
// index may however contain additional tokens not specified in $expected.
analyze: (suite, t, analyzer, comment, text, expected) => {

@@ -71,3 +77,6 @@ suite.assert(done => {

if (err) { console.error(err); }
t.deepEqual(common.bucketTokens(res.tokens), common.bucketTokens(expected), comment);
t.deepEqual({}, removeIndexTokensFromExpectedTokens(
common.bucketTokens(res.tokens),
common.bucketTokens(expected)
), comment);
done();

@@ -79,2 +88,12 @@ });

function removeIndexTokensFromExpectedTokens(index, expected){
for (var pos in index) {
if (!_.isArray(expected[pos])) { continue; }
expected[pos] = expected[pos].filter(token => !index[pos].includes(token));
if (_.isEmpty(expected[pos])) { delete expected[pos]; }
}
return expected;
}
var tests = [

@@ -81,0 +100,0 @@ require('./validate.js'),

{
"name": "pelias-schema",
"version": "5.5.1",
"version": "5.6.0",
"author": "pelias",

@@ -36,2 +36,3 @@ "description": "Elasticsearch schema files and tooling for Pelias",

"elasticsearch": "^16.0.0",
"glob": "^7.1.6",
"lodash": "^4.17.15",

@@ -38,0 +39,0 @@ "pelias-config": "^4.5.0",

const _ = require('lodash');
const fs = require('fs');
const path = require('path');
const peliasConfig = require('pelias-config');
const punctuation = require('./punctuation');
const synonymParser = require('./synonyms/parser');
const synonymLinter = require('./synonyms/linter');
const synonyms = require('./synonyms/loader').load();
// load synonyms from disk
const synonyms = fs.readdirSync(path.join(__dirname, 'synonyms'))
.sort()
.filter( f => f.match(/\.txt$/) )
.reduce(( acc, cur ) => {
acc[cur.replace('.txt', '')] = synonymParser(
path.join(__dirname, 'synonyms', cur)
);
return acc;
}, {});
// emit synonym warnings
synonymLinter(synonyms);
require('./configValidation').validate(peliasConfig.generate());

@@ -53,5 +36,5 @@

"lowercase",
"trim",
"admin_synonyms_multiplexer",
"icu_folding",
"trim",
"custom_admin",
"word_delimiter",

@@ -69,8 +52,5 @@ "unique_only_same_position",

"lowercase",
"trim",
"name_synonyms_multiplexer",
"icu_folding",
"trim",
"custom_name",
"street_suffix",
"directionals",
"ampersand",
"remove_ordinals",

@@ -89,5 +69,5 @@ "removeAllZeroNumericPrefix",

"filter": [
"icu_folding",
"lowercase",
"trim",
"icu_folding",
"remove_ordinals",

@@ -107,6 +87,3 @@ "removeAllZeroNumericPrefix",

"remove_duplicate_spaces",
"ampersand",
"custom_name",
"street_suffix",
"directionals",
"name_synonyms_multiplexer",
"icu_folding",

@@ -122,7 +99,7 @@ "remove_ordinals",

"tokenizer":"keyword",
"char_filter" : ["alphanumeric"],
"char_filter": ["alphanumeric", "nfkc_normalizer"],
"filter": [
"lowercase",
"trim",
"icu_folding",
"trim",
"unique_only_same_position",

@@ -135,7 +112,7 @@ "notnull"

"tokenizer":"keyword",
"char_filter" : ["alphanumeric"],
"char_filter": ["alphanumeric", "nfkc_normalizer"],
"filter": [
"lowercase",
"trim",
"icu_folding",
"trim",
"unique_only_same_position",

@@ -158,5 +135,3 @@ "notnull"

"remove_duplicate_spaces",
"custom_street",
"street_suffix",
"directionals",
"street_synonyms_multiplexer",
"icu_folding",

@@ -172,2 +147,33 @@ "remove_ordinals",

"filter" : {
"street_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_street",
"synonyms/personal_titles",
"synonyms/streets",
"synonyms/directionals"
]
},
"name_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation"
]
},
"admin_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names"
]
},
"notnull" :{

@@ -230,9 +236,10 @@ "type" : "length",

// dynamically create filters for all synonym files in the ./synonyms directory.
// each filter is given the same name as the file, minus the extension.
_.each(synonyms, (synonym, key) => {
settings.analysis.filter[key] = {
// each filter is given the same name as the file, paths separators are replaced with
// underscores and the file extension is removed.
_.each(synonyms, (synonym, name) => {
settings.analysis.filter[`synonyms/${name}`] = {
"type": "synonym",
"synonyms": !_.isEmpty(synonym) ? synonym : ['']
};
})
});

@@ -239,0 +246,0 @@ // Merge settings from pelias/config

@@ -26,7 +26,1 @@ # =============================================================================

# =============================================================================
saint,st
sainte,ste
fort,ft
mount,mt
mont,mt

@@ -26,150 +26,1 @@ # =============================================================================

# =============================================================================
# English
brothers,bros
cape,cpe,cp
city,cty
creek,cr,crk
county,co,cty
downs,downes,dwns
flats,flts
forest,frst,fst
fort,ft
fords,frds
fork,frk
forks,frks
forge,frg
forges,frgs
glens,glns
great,grt,gt
greater,grtr,gtr
greens,grns
groves,grvs
heights,hghts,hgts,hieghts,ht,hts,hgths
international,intl
lake,lk
lakes,lks
little,ltl,lttl,littl,litl
lock,lck
locks,lcks
lower,low,lwr,lr
medical,med
memorial,mem
middle,mid,midl
military,mil
mount,mt,mnt
mountain,mtn
mountains,mtns
municipal,mun,mpal
national,natl
neck,nck
orchard,orch
paradise,pde,pdse
port,pt,prt
park,pk,prk
rear of,r / o,r o
river,riv,rvr,rivr
slope,slpe,slp
springs,spgs,sprngs
stream,strm,stm
triangle,tri
upper,up,upr,uppr
village,vlg,vlge,vilg,vilge
ville,vl
villages,vlgs
wood,wd
woods,wds
# French
baston,bast
bourg,brg
charmille,chi
colline,coli
collines,colis
enceinte,en
fleuve,fl
grand,gd,gr,g
mont,mt,mnt
petite,p,pt
porche,pch
rivière,riviere,riv
village,vge
villages,vges
# German
deutsch,dt
ehemalige,ehem
gebruder,gebr
haltestelle,hst
hinter,hint,ht
internationale,int
kleine,kl
kleiner,kl
kleines,kl
kogel,kg
niedere,nd
rhein,rh
see,s
spitze,sp
vordere,vd,vord
wiese,ws
# Spanish
abril,abr,abl
agosto,ag,agto,agt
altura,alt
alturas,alts
arboleda,arb
arrabal,arral
bosque,bsq
brigada,brig
cabo,cbo
campo,cpo,cmpo
campos,cpos,cmpos
canal,cnl
centro,cntro,ctro
cerro,crro
corral,crral
corralillo,crrlo
diseminado,disem
enero,en,eno,ene,en o
diciembre,dic,dicbre,dice,dbre,10bre,10 bre,xbre,x bre
febrero,febo,febro,febr,feb
gobierno,gob,gobno
grande,gr
guerra,ga
independencia,indep
infantería,infanteria,infa,ynfa,ynfanta
jardín,jdin,jard,jardin
jardínes,jdins,jards,jardines
junio,jun,jn
julio,jul,jl
lago,lg
lagos,lgs
laguna,lgna
llanura,llnra
llanuras,llnras
marzo,mzo,mar
mayo,my,may
militar,milr
monte,mt,mte,mnte
montes,mts,mtes,mntes,mnts
nacional,nal,nacl
noviembre,nbre,nvre,nove,novre,novbre,9bre,9 bre
octubre,oct,octbre,octe,8bre,8 bre
portillo,ptilo,ptllo
prado,prdo
primeros,pros
privada,priv
punta,pnta
quebrada,qbda
real,rl
republica,rep
revolucion,rev
ribera,ribr
río,rio
septiembre,setbre,sepe,sepbre,7bre,7 re,7re,7 bre,sep,set
sierra,srra
valle,vlle
volcan,vlcn
voluntarios,voluntos

@@ -43,3 +43,4 @@ const _ = require('lodash');

tokensSanityCheck(line, logprefix, tokens);
// multiWordCheck(line, logprefix, tokens);
multiWordCheck(line, logprefix, tokens);
// tokenLengthCheck(line, logprefix, tokens);
})

@@ -69,6 +70,6 @@ })

function multiWordCheck(line, tokens) {
function multiWordCheck(line, logprefix, tokens) {
_.each(tokens, token => {
if (/\s/.test(token)){
logger.warn(`multi word synonyms may cause issues with phrase queries:`, token);
logger.warn(`${logprefix} multi word synonyms may cause issues with phrase queries:`, token);
}

@@ -78,2 +79,10 @@ });

function tokenLengthCheck(line, logprefix, tokens) {
_.each(tokens, token => {
if (token.length <= 1) {
logger.warn(`${logprefix} short token:`, token);
}
});
}
module.exports = linter

@@ -1,2 +0,2 @@

var fs = require('fs');
const fs = require('fs');

@@ -3,0 +3,0 @@ // https://www.elastic.co/guide/en/elasticsearch/reference/2.4/analysis-synonym-tokenfilter.html

@@ -53,2 +53,19 @@ var path = require('path'),

// this multiplexer filter provides all the synonyms used by the peliasAdmin analyzer
// note: the multiplexer ensures than we do not virally generate synonyms of synonyms.
module.exports.tests.nameSynonymsMultiplexerFilter = function (test, common) {
test('has admin_synonyms_multiplexer filter', function (t) {
var s = settings();
t.equal(typeof s.analysis.filter.admin_synonyms_multiplexer, 'object', 'there is a admin_synonyms_multiplexer filter');
var filter = s.analysis.filter.admin_synonyms_multiplexer;
t.equal(filter.type, 'multiplexer');
t.deepEqual(filter.filters, [
'synonyms/custom_admin',
'synonyms/personal_titles',
'synonyms/place_names'
]);
t.end();
});
};
module.exports.tests.peliasAdminAnalyzer = function(test, common) {

@@ -61,7 +78,42 @@ test('has pelias admin analyzer', function(t) {

t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
t.true(Array.isArray(analyzer.filter), 'filters specified');
t.end();
});
test('peliasAdmin token filters', function (t) {
var analyzer = settings().analysis.analyzer.peliasAdmin;
t.deepEqual(analyzer.filter, [
"lowercase",
"trim",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
"unique_only_same_position",
"notnull",
"flatten_graph"
]);
t.end();
});
};
// this multiplexer filter provides all the synonyms used by the peliasPhrase and peliasIndexOneEdgeGram analyzers
// note: the multiplexer ensures than we do not virally generate synonyms of synonyms.
module.exports.tests.nameSynonymsMultiplexerFilter = function (test, common) {
test('has name_synonyms_multiplexer filter', function (t) {
var s = settings();
t.equal(typeof s.analysis.filter.name_synonyms_multiplexer, 'object', 'there is a name_synonyms_multiplexer filter');
var filter = s.analysis.filter.name_synonyms_multiplexer;
t.equal(filter.type, 'multiplexer');
t.deepEqual(filter.filters, [
'synonyms/custom_name',
'synonyms/personal_titles',
'synonyms/place_names',
'synonyms/streets',
'synonyms/directionals',
'synonyms/punctuation'
]);
t.end();
});
};
module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) {

@@ -82,8 +134,5 @@ test('has peliasIndexOneEdgeGram analyzer', function(t) {

"lowercase",
"trim",
"name_synonyms_multiplexer",
"icu_folding",
"trim",
"custom_name",
"street_suffix",
"directionals",
"ampersand",
"remove_ordinals",

@@ -114,5 +163,5 @@ "removeAllZeroNumericPrefix",

t.deepEqual(analyzer.filter, [
'icu_folding',
'lowercase',
'trim',
'icu_folding',
'remove_ordinals',

@@ -144,6 +193,3 @@ 'removeAllZeroNumericPrefix',

"remove_duplicate_spaces",
"ampersand",
"custom_name",
"street_suffix",
"directionals",
"name_synonyms_multiplexer",
"icu_folding",

@@ -166,3 +212,3 @@ "remove_ordinals",

t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
t.deepEqual(analyzer.char_filter, ["alphanumeric"], 'alphanumeric filter specified');
t.deepEqual(analyzer.char_filter, ['alphanumeric', 'nfkc_normalizer'], 'alphanumeric filter specified');
t.true(Array.isArray(analyzer.filter), 'filters specified');

@@ -175,4 +221,4 @@ t.end();

"lowercase",
"trim",
"icu_folding",
"trim",
"unique_only_same_position",

@@ -192,3 +238,3 @@ "notnull"

t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
t.deepEqual(analyzer.char_filter, ["alphanumeric"], 'alphanumeric filter specified');
t.deepEqual(analyzer.char_filter, ['alphanumeric', 'nfkc_normalizer'], 'alphanumeric filter specified');
t.true(Array.isArray(analyzer.filter), 'filters specified');

@@ -201,4 +247,4 @@ t.end();

"lowercase",
"trim",
"icu_folding",
"trim",
"unique_only_same_position",

@@ -224,2 +270,20 @@ "notnull"

// this multiplexer filter provides all the synonyms used by the peliasStreet analyzer
// note: the multiplexer ensures than we do not virally generate synonyms of synonyms.
module.exports.tests.streetSynonymsMultiplexerFilter = function (test, common) {
test('has street_synonyms_multiplexer filter', function (t) {
var s = settings();
t.equal(typeof s.analysis.filter.street_synonyms_multiplexer, 'object', 'there is a street_synonyms_multiplexer filter');
var filter = s.analysis.filter.street_synonyms_multiplexer;
t.equal(filter.type, 'multiplexer');
t.deepEqual(filter.filters, [
'synonyms/custom_street',
'synonyms/personal_titles',
'synonyms/streets',
'synonyms/directionals'
]);
t.end();
});
};
module.exports.tests.peliasStreetAnalyzer = function(test, common) {

@@ -232,3 +296,3 @@ test('has peliasStreet analyzer', function(t) {

t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
t.deepEqual(analyzer.char_filter, ["punctuation","nfkc_normalizer"], 'character filters specified');
t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
t.true(Array.isArray(analyzer.filter), 'filters specified');

@@ -243,5 +307,3 @@ t.end();

"remove_duplicate_spaces",
"custom_street",
"street_suffix",
"directionals",
"street_synonyms_multiplexer",
"icu_folding",

@@ -328,7 +390,7 @@ "remove_ordinals",

// we convert and->& rather than &->and to save memory/disk
module.exports.tests.ampersandFilter = function(test, common) {
test('has ampersand filter', function(t) {
module.exports.tests.punctuationFilter = function(test, common) {
test('has punctuation filter', function(t) {
var s = settings();
t.equal(typeof s.analysis.filter.ampersand, 'object', 'there is a ampersand filter');
var filter = s.analysis.filter.ampersand;
t.equal(typeof s.analysis.filter['synonyms/punctuation'], 'object', 'there is a punctuation filter');
var filter = s.analysis.filter['synonyms/punctuation'];
t.equal(filter.type, 'synonym');

@@ -382,12 +444,12 @@ t.deepEqual(filter.synonyms, [

// this filter stems common street suffixes
// eg. road=>rd and street=>st
// this filter provides synonyms for street suffixes
// eg. road=>rd
module.exports.tests.streetSynonymFilter = function(test, common) {
test('has street_suffix filter', function(t) {
test('has synonyms/streets filter', function(t) {
var s = settings();
t.equal(typeof s.analysis.filter.street_suffix, 'object', 'there is an street_suffix filter');
var filter = s.analysis.filter.street_suffix;
t.equal(typeof s.analysis.filter['synonyms/streets'], 'object', 'there is a synonyms/streets filter');
var filter = s.analysis.filter['synonyms/streets'];
t.equal(filter.type, 'synonym');
t.true(Array.isArray(filter.synonyms));
t.equal(filter.synonyms.length, 127);
t.equal(filter.synonyms.length, 809);
t.end();

@@ -399,10 +461,10 @@ });

// eg. north=>n and south=>s
module.exports.tests.directionSynonymFilter = function(test, common) {
module.exports.tests.directionalSynonymFilter = function(test, common) {
test('has directionals filter', function(t) {
var s = settings();
t.equal(typeof s.analysis.filter.directionals, 'object', 'there is an directionals filter');
var filter = s.analysis.filter.directionals;
t.equal(typeof s.analysis.filter['synonyms/directionals'], 'object', 'there is a synonyms/directionals filter');
var filter = s.analysis.filter['synonyms/directionals'];
t.equal(filter.type, 'synonym');
t.true(Array.isArray(filter.synonyms));
t.equal(filter.synonyms.length, 8);
t.equal(filter.synonyms.length, 69);
t.end();

@@ -412,2 +474,30 @@ });

// this filter provides common synonyms for personal titles
// eg. doctor=>dr
module.exports.tests.personalTitleSynonymFilter = function (test, common) {
test('has personal_titles filter', function (t) {
var s = settings();
t.equal(typeof s.analysis.filter['synonyms/personal_titles'], 'object', 'there is a synonyms/personal_titles filter');
var filter = s.analysis.filter['synonyms/personal_titles'];
t.equal(filter.type, 'synonym');
t.true(Array.isArray(filter.synonyms));
t.equal(filter.synonyms.length, 191);
t.end();
});
};
// this filter provides common synonyms for place names
// eg. park=>pk
module.exports.tests.placeNameSynonymFilter = function (test, common) {
test('has place_names filter', function (t) {
var s = settings();
t.equal(typeof s.analysis.filter['synonyms/place_names'], 'object', 'there is a synonyms/place_names filter');
var filter = s.analysis.filter['synonyms/place_names'];
t.equal(filter.type, 'synonym');
t.true(Array.isArray(filter.synonyms));
t.equal(filter.synonyms.length, 314);
t.end();
});
};
// this filter removes number ordinals

@@ -414,0 +504,0 @@ // eg. 26th => 26, 1st => 1

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc