@telefonica/language-model-converter
Advanced tools
Comparing version 2.1.0 to 2.1.1
import { Luis } from './luis-model'; | ||
export declare type culture = 'en-us' | 'es-es'; | ||
export declare class LanguageModelParser { | ||
private doc; | ||
parse(files: string[], culture: string): Luis.Model; | ||
culture: culture; | ||
parse(files: string[], culture: culture): Luis.Model; | ||
private expandVariables(sentence, variables); | ||
@@ -9,3 +11,4 @@ private extractEntities(sentence); | ||
private normalizeSentence(sentence); | ||
private static wordCount(sentence); | ||
private buildUtterance(sentence, intent); | ||
} |
@@ -18,2 +18,3 @@ "use strict"; | ||
} | ||
this.culture = culture; | ||
let luisModel = { | ||
@@ -118,12 +119,28 @@ luis_schema_version: '1.3.0', | ||
normalizeSentence(sentence) { | ||
let normalizedSentence = sentence.replace(/[^\w\u00C0-\u017F|_|\.]/g, capture => ' ' + capture + ' '); | ||
normalizedSentence = normalizedSentence.replace(' º ', 'º'); | ||
normalizedSentence = normalizedSentence.replace(' ª ', 'ª'); | ||
normalizedSentence = normalizedSentence.replace(/\s\s+/g, ' '); | ||
normalizedSentence = normalizedSentence.trim(); | ||
return normalizedSentence; | ||
let normalized = sentence | ||
.replace(/\s\s+/g, ' ') | ||
.trim(); | ||
switch (this.culture) { | ||
case 'en-us': | ||
normalized = normalized.replace(/[A-Z]/g, capture => capture.toLowerCase()); | ||
break; | ||
default: | ||
normalized = normalized.toLocaleLowerCase(); | ||
} | ||
return normalized; | ||
} | ||
static wordCount(sentence) { | ||
return sentence | ||
.replace(/[^\w\u00C0-\u017F]/g, capture => ` ${capture} `) | ||
.replace(/_/g, capture => ` ${capture} `) | ||
.replace(' º ', 'º') | ||
.replace(' ª ', 'ª') | ||
.replace(/\s\s+/g, ' ') | ||
.trim() | ||
.split(' ') | ||
.length; | ||
} | ||
buildUtterance(sentence, intent) { | ||
let entities = []; | ||
let parts = []; | ||
let parts = ''; | ||
sentence | ||
@@ -136,6 +153,5 @@ .trim() | ||
extractedEntities.forEach(entity => { | ||
let startPos = parts.length; | ||
let newWords = this.normalizeSentence(entity.entityValue).split(' '); | ||
parts = parts.concat(newWords); | ||
let endPos = parts.length - 1; | ||
let startPos = LanguageModelParser.wordCount(parts); | ||
parts += entity.entityValue; | ||
let endPos = LanguageModelParser.wordCount(parts) - 1; | ||
entities.push({ | ||
@@ -149,8 +165,7 @@ entity: entity.entityType, | ||
else { | ||
let newWords = this.normalizeSentence(part).split(' '); | ||
parts = parts.concat(newWords); | ||
parts += part; | ||
} | ||
}); | ||
let utterance = { | ||
text: parts.join(' ').trim(), | ||
text: this.normalizeSentence(parts), | ||
intent, | ||
@@ -157,0 +172,0 @@ entities: entities |
@@ -7,3 +7,3 @@ "use strict"; | ||
let parser = new parser_1.LanguageModelParser(); | ||
let luisModel = parser.parse(['./test/fixtures/en-basic.yaml'], 'en-basic'); | ||
let luisModel = parser.parse(['./test/fixtures/en-basic.yaml'], 'en-us'); | ||
let expectedIntents = [ | ||
@@ -33,3 +33,3 @@ { | ||
{ | ||
text: 'This is a test ( utterance ) . Entities London and Madrid and Spain', | ||
text: 'this is a test (utterance). entities london and madrid and spain', | ||
intent: 'my.test.intent.1', | ||
@@ -55,3 +55,3 @@ entities: [ | ||
{ | ||
text: 'This is a test utterance 1', | ||
text: 'this is a test utterance 1', | ||
intent: 'my.test.intent.2', | ||
@@ -61,3 +61,3 @@ entities: [] | ||
{ | ||
text: 'This is a test utterance 2', | ||
text: 'this is a test utterance 2', | ||
intent: 'my.test.intent.2', | ||
@@ -71,14 +71,14 @@ entities: [] | ||
let parser = new parser_1.LanguageModelParser(); | ||
let luisModel = parser.parse(['./test/fixtures/en-basic.yaml'], 'en-basic'); | ||
let luisModel = parser.parse(['./test/fixtures/en-basic.yaml'], 'en-us'); | ||
chai_1.expect(luisModel.luis_schema_version).to.not.be.empty; | ||
chai_1.expect(luisModel.name).to.not.be.empty; | ||
chai_1.expect(luisModel.desc).to.not.be.empty; | ||
chai_1.expect(luisModel.culture).to.eq('en-basic'); | ||
chai_1.expect(luisModel.culture).to.eq('en-us'); | ||
}); | ||
it('should parse a valid yaml file with corner cases', () => { | ||
let parser = new parser_1.LanguageModelParser(); | ||
let luisModel = parser.parse(['./test/fixtures/en-cornercases.yaml'], 'en-cornercases'); | ||
let luisModel = parser.parse(['./test/fixtures/en-cornercases.yaml'], 'en-us'); | ||
let expectedUtterances = [ | ||
{ | ||
'text': 'Santiago went to the Santiago Bernabeu .', | ||
'text': 'santiago went to the santiago bernabeu.', | ||
'intent': 'my.test.intent.1', | ||
@@ -94,3 +94,3 @@ 'entities': [ | ||
{ | ||
'text': 'I live in the 2ºC apartment in Madrid .', | ||
'text': 'i live in the 2ºc apartment in madrid.', | ||
'intent': 'my.test.intent.1', | ||
@@ -104,2 +104,105 @@ 'entities': [ | ||
] | ||
}, | ||
{ | ||
'text': `i don't like paris`, | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 5, | ||
'endPos': 5 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': `i've 123 friends. in paris`, | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 7, | ||
'endPos': 7 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': `i'd like to go to o'brian and l.a.`, | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 7, | ||
'endPos': 9 | ||
}, | ||
{ | ||
'entity': 'location', | ||
'startPos': 11, | ||
'endPos': 14 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': 'i _love_ paris and great_britain.', | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 4, | ||
'endPos': 4 | ||
}, | ||
{ | ||
'entity': 'location', | ||
'startPos': 6, | ||
'endPos': 8 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': 'the best city in-the-world is new-york', | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 9, | ||
'endPos': 11 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': 'i love open spaces , like the one in bei jing and london', | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 9, | ||
'endPos': 10 | ||
}, | ||
{ | ||
'entity': 'location', | ||
'startPos': 12, | ||
'endPos': 12 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': `symbols everywhere! ºª\\!|"@·#$%&¬/()=?¿'¡^\`<>,;.:-_¨*+ london`, | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 36, | ||
'endPos': 36 | ||
} | ||
] | ||
}, | ||
{ | ||
'text': 'Çç vayÁ cÓn Úna eÑes en Éspaña no son gÜenas nÍ cigüenas en parÍs', | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 13, | ||
'endPos': 13 | ||
} | ||
] | ||
} | ||
@@ -109,8 +212,26 @@ ]; | ||
}); | ||
it('should parse a valid yaml file with variables (phrase lists)', () => { | ||
it('should deal with locale specifities', function () { | ||
let parser = new parser_1.LanguageModelParser(); | ||
let luisModel = parser.parse(['./test/fixtures/en-variables.yaml'], 'en-variables'); | ||
let luisModel = parser.parse(['./test/fixtures/es-cornercases.yaml'], 'es-es'); | ||
let expectedUtterances = [ | ||
{ | ||
'text': 'This is the country Spain', | ||
'text': 'çç vayá cón úna eñes en éspaña no son güenas ní cigüenas en parís', | ||
'intent': 'my.test.intent.1', | ||
'entities': [ | ||
{ | ||
'entity': 'location', | ||
'startPos': 13, | ||
'endPos': 13 | ||
} | ||
] | ||
} | ||
]; | ||
chai_1.expect(luisModel.utterances).to.eql(expectedUtterances); | ||
}); | ||
it('should parse a valid yaml file with variables (lists)', () => { | ||
let parser = new parser_1.LanguageModelParser(); | ||
let luisModel = parser.parse(['./test/fixtures/en-variables.yaml'], 'en-us'); | ||
let expectedUtterances = [ | ||
{ | ||
'text': 'this is the country spain', | ||
'intent': 'my.test.intent', | ||
@@ -126,3 +247,3 @@ 'entities': [ | ||
{ | ||
'text': 'This is the country France', | ||
'text': 'this is the country france', | ||
'intent': 'my.test.intent', | ||
@@ -138,3 +259,3 @@ 'entities': [ | ||
{ | ||
'text': 'This is the country Spain', | ||
'text': 'this is the country spain', | ||
'intent': 'my.test.expansion', | ||
@@ -144,3 +265,3 @@ 'entities': [] | ||
{ | ||
'text': 'This is the country France', | ||
'text': 'this is the country france', | ||
'intent': 'my.test.expansion', | ||
@@ -150,3 +271,3 @@ 'entities': [] | ||
{ | ||
'text': 'This is the color Red', | ||
'text': 'this is the color red', | ||
'intent': 'my.test.expansion', | ||
@@ -156,3 +277,3 @@ 'entities': [] | ||
{ | ||
'text': 'This is the color Blue', | ||
'text': 'this is the color blue', | ||
'intent': 'my.test.expansion', | ||
@@ -162,3 +283,3 @@ 'entities': [] | ||
{ | ||
'text': 'This is the Red Spain', | ||
'text': 'this is the red spain', | ||
'intent': 'my.test.expansion', | ||
@@ -168,3 +289,3 @@ 'entities': [] | ||
{ | ||
'text': 'This is the Blue Spain', | ||
'text': 'this is the blue spain', | ||
'intent': 'my.test.expansion', | ||
@@ -174,3 +295,3 @@ 'entities': [] | ||
{ | ||
'text': 'This is the Red France', | ||
'text': 'this is the red france', | ||
'intent': 'my.test.expansion', | ||
@@ -180,5 +301,25 @@ 'entities': [] | ||
{ | ||
'text': 'This is the Blue France', | ||
'text': 'this is the blue france', | ||
'intent': 'my.test.expansion', | ||
'entities': [] | ||
}, | ||
{ | ||
'text': 'this is the spain spain', | ||
'intent': 'my.test.expansion', | ||
'entities': [] | ||
}, | ||
{ | ||
'text': 'this is the spain france', | ||
'intent': 'my.test.expansion', | ||
'entities': [] | ||
}, | ||
{ | ||
'text': 'this is the france spain', | ||
'intent': 'my.test.expansion', | ||
'entities': [] | ||
}, | ||
{ | ||
'text': 'this is the france france', | ||
'intent': 'my.test.expansion', | ||
'entities': [] | ||
} | ||
@@ -185,0 +326,0 @@ ]; |
{ | ||
"name": "@telefonica/language-model-converter", | ||
"version": "2.1.0", | ||
"version": "2.1.1", | ||
"description": "Language model converter for yot-bot", | ||
@@ -5,0 +5,0 @@ "license": "UNLICENSED", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
36920
569