@microsoft/bf-lu
Advanced tools
Comparing version 4.10.0-dev.20200804.10e144c to 4.10.0-dev.20200806.789391b
@@ -19,2 +19,3 @@ "use strict"; | ||
const LUISObjNameEnum = require('./../utils/enums/luisobjenum'); | ||
const fetch = require('node-fetch'); | ||
module.exports = { | ||
@@ -354,3 +355,3 @@ /** | ||
}; | ||
const buildLuJsonObject = async function (luObjArray, log, luis_culture, luSearchFn = findLuFilesInDir) { | ||
const buildLuJsonObject = async function (luObjArray, log, luis_culture, luSearchFn = resolveLuContent) { | ||
let allParsedLUISContent = []; | ||
@@ -399,34 +400,72 @@ let allParsedQnAContent = []; | ||
}; | ||
const findLuFilesInDir = async function (srcId, idsToFind) { | ||
const resolveLuContent = async function (srcId, idsToFind) { | ||
let luObjects = []; | ||
let parentFilePath = srcId === 'stdin' ? process.cwd() : path.parse(path.resolve(srcId)).dir; | ||
for (let idx = 0; idx < idsToFind.length; idx++) { | ||
// Support wild cards at the end of a relative .LU file path. | ||
// './bar/*' should look for all .lu files under the specified folder. | ||
// './bar/**' should recursively look for .lu files under sub-folders as well. | ||
let file = idsToFind[idx]; | ||
if (file.filePath.endsWith('*')) { | ||
const isRecursive = file.filePath.endsWith('**'); | ||
const rootFolder = file.filePath.replace(/\*/g, ''); | ||
let rootPath = rootFolder; | ||
if (!path.isAbsolute(rootFolder)) { | ||
rootPath = path.resolve(parentFilePath, rootFolder); | ||
} | ||
// Get LU files in this location | ||
const luFilesToAdd = helpers.findLUFiles(rootPath, isRecursive); | ||
// add these to filesToParse | ||
for (let f = 0; f < luFilesToAdd.length; f++) { | ||
const opts = new luOptions(luFilesToAdd[f], file.includeInCollate); | ||
luObjects.push(new luObject(readLuFile(luFilesToAdd[f]), opts)); | ||
} | ||
continue; | ||
let toResolve = idsToFind[idx]; | ||
if (isUrl(toResolve.filePath)) { | ||
await resolveLuUriContent(srcId, toResolve, luObjects); | ||
} | ||
if (!path.isAbsolute(file.filePath)) { | ||
file.filePath = path.resolve(parentFilePath, file.filePath); | ||
else { | ||
resolveLuFileContent(toResolve, luObjects, srcId); | ||
} | ||
// find matching parsed files and ensure includeInCollate is updated if needed. | ||
luObjects.push(new luObject(readLuFile(file.filePath), new luOptions(file.filePath, file.includeInCollate))); | ||
} | ||
return luObjects; | ||
}; | ||
const resolveLuFileContent = function (file, luObjects, srcId) { | ||
let parentFilePath = srcId === 'stdin' ? process.cwd() : path.parse(path.resolve(srcId)).dir; | ||
// Support wild cards at the end of a relative .LU file path. | ||
// './bar/*' should look for all .lu files under the specified folder. | ||
// './bar/**' should recursively look for .lu files under sub-folders as well. | ||
if (file.filePath.endsWith('*')) { | ||
const isRecursive = file.filePath.endsWith('**'); | ||
const rootFolder = file.filePath.replace(/\*/g, ''); | ||
let rootPath = rootFolder; | ||
if (!path.isAbsolute(rootFolder)) { | ||
rootPath = path.resolve(parentFilePath, rootFolder); | ||
} | ||
// Get LU files in this location | ||
const luFilesToAdd = helpers.findLUFiles(rootPath, isRecursive); | ||
// add these to filesToParse | ||
for (let f = 0; f < luFilesToAdd.length; f++) { | ||
const opts = new luOptions(luFilesToAdd[f], file.includeInCollate); | ||
luObjects.push(new luObject(readLuFile(luFilesToAdd[f]), opts)); | ||
} | ||
return; | ||
} | ||
if (!path.isAbsolute(file.filePath)) { | ||
file.filePath = path.resolve(parentFilePath, file.filePath); | ||
} | ||
// find matching parsed files and ensure includeInCollate is updated if needed. | ||
luObjects.push(new luObject(readLuFile(file.filePath), new luOptions(file.filePath, file.includeInCollate))); | ||
}; | ||
const resolveLuUriContent = async function (srcId, toResolve, luObjects) { | ||
let uri = toResolve.filePath || undefined; | ||
if (uri !== undefined) { | ||
let response; | ||
try { | ||
response = await fetch(uri, { method: 'GET' }); | ||
} | ||
catch (err) { | ||
// throw, invalid URI | ||
let errorMsg = `URI: "${uri}" appears to be invalid. Please double check the URI or re-try this parse when you are connected to the internet.`; | ||
let error = BuildDiagnostic({ | ||
message: errorMsg, | ||
range: luImport.Range | ||
}); | ||
throw (new exception(retCode.errorCode.INVALID_URI, error.toString(), [error])); | ||
} | ||
var res = await response.buffer(); | ||
var encodedRes = helpers.fixBuffer(res); | ||
luObjects.push(new luObject(encodedRes, new luOptions(toResolve.filePath, toResolve.includeInCollate))); | ||
} | ||
}; | ||
const isUrl = function (path) { | ||
try { | ||
new URL(path); | ||
return true; | ||
} | ||
catch (err) { | ||
return false; | ||
} | ||
}; | ||
const updateParsedFiles = function (allParsedLUISContent, allParsedQnAContent, allParsedAlterationsContent, luobject) { | ||
@@ -433,0 +472,0 @@ // find the instance and ensure includeInCollate property is set correctly |
@@ -91,3 +91,8 @@ "use strict"; | ||
let text = utterance.text; | ||
let sortedEntitiesList = objectSortByStartPos(utterance.entities); | ||
// flatten entities | ||
let flatEntities = []; | ||
Object.assign([], utterance.entities).forEach(entity => flattenEntities(entity, flatEntities)); | ||
let sortedEntitiesList = objectSortByStartPos(flatEntities); | ||
// remove all children | ||
sortedEntitiesList.forEach(entity => delete entity.children); | ||
let tokenizedText = text.split(''); | ||
@@ -107,2 +112,8 @@ // handle cases where we have both child as well as cases where more than one entity can have the same start position | ||
}; | ||
const flattenEntities = function (entity, flatEntities) { | ||
if (entity.children !== undefined && Array.isArray(entity.children) && entity.children.length !== 0) { | ||
entity.children.forEach(child => flattenEntities(child, flatEntities)); | ||
} | ||
flatEntities.push(Object.assign({}, entity)); | ||
}; | ||
const getEntitiesByPositionList = function (entitiesList, tokenizedText) { | ||
@@ -109,0 +120,0 @@ (entitiesList || []).forEach(entity => { |
@@ -80,5 +80,2 @@ "use strict"; | ||
throw (new exception(retCode.errorCode.INVALID_LU_FILE_REF, `[ERROR]: Invalid LU File Ref: "${utterance}"`)); | ||
let parseUrl = url.parse(linkValue); | ||
if (parseUrl.host || parseUrl.hostname) | ||
throw (new exception(retCode.errorCode.INVALID_LU_FILE_REF, `[ERROR]: Invalid LU File Ref: "${utterance}". \n Reference cannot be a URI`)); | ||
// reference can either be #<Intent-Name> or #? or /*#? or /**#? or #*utterance* or #<Intent-Name>*patterns* | ||
@@ -182,2 +179,36 @@ let splitRegExp = new RegExp(/^(?<fileName>.*?)(?<segment>#|\*+)(?<path>.*?)$/gim); | ||
}); | ||
}, | ||
fixBuffer: function (fileBuffer) { | ||
if (fileBuffer) { | ||
// If the data starts with BOM, we know it is UTF | ||
if (fileBuffer[0] === 0xEF && fileBuffer[1] === 0xBB && fileBuffer[2] === 0xBF) { | ||
// EF BB BF UTF-8 with BOM | ||
fileBuffer = fileBuffer.slice(3); | ||
} | ||
else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) { | ||
// FF FE 00 00 UTF-32, little-endian BOM | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFE && fileBuffer[3] === 0xFF) { | ||
// 00 00 FE FF UTF-32, big-endian BOM | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) { | ||
// FE FF 00 00 UCS-4, unusual octet order BOM (3412) | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFF && fileBuffer[3] === 0xFE) { | ||
// 00 00 FF FE UCS-4, unusual octet order BOM (2143) | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE) { | ||
// FF FE UTF-16, little endian BOM | ||
fileBuffer = fileBuffer.slice(2); | ||
} | ||
else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF) { | ||
// FE FF UTF-16, big endian BOM | ||
fileBuffer = fileBuffer.slice(2); | ||
} | ||
} | ||
return fileBuffer.toString('utf8').replace(/\0/g, ''); | ||
} | ||
@@ -184,0 +215,0 @@ }; |
@@ -10,2 +10,3 @@ "use strict"; | ||
const retCode = require('./../parser/utils/enums/CLI-errors'); | ||
const helpers = require('./../parser/utils/helpers'); | ||
async function readTextFile(file) { | ||
@@ -18,34 +19,3 @@ return new Promise(async (resolve, reject) => { | ||
let fileBuffer = await fs.readFile(file); | ||
if (fileBuffer) { | ||
// If the data starts with BOM, we know it is UTF | ||
if (fileBuffer[0] === 0xEF && fileBuffer[1] === 0xBB && fileBuffer[2] === 0xBF) { | ||
// EF BB BF UTF-8 with BOM | ||
fileBuffer = fileBuffer.slice(3); | ||
} | ||
else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) { | ||
// FF FE 00 00 UTF-32, little-endian BOM | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFE && fileBuffer[3] === 0xFF) { | ||
// 00 00 FE FF UTF-32, big-endian BOM | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) { | ||
// FE FF 00 00 UCS-4, unusual octet order BOM (3412) | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFF && fileBuffer[3] === 0xFE) { | ||
// 00 00 FF FE UCS-4, unusual octet order BOM (2143) | ||
fileBuffer = fileBuffer.slice(4); | ||
} | ||
else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE) { | ||
// FF FE UTF-16, little endian BOM | ||
fileBuffer = fileBuffer.slice(2); | ||
} | ||
else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF) { | ||
// FE FF UTF-16, big endian BOM | ||
fileBuffer = fileBuffer.slice(2); | ||
} | ||
} | ||
return resolve(fileBuffer.toString('utf8').replace(/\0/g, '')); | ||
return resolve(helpers.fixBuffer(fileBuffer)); | ||
} | ||
@@ -52,0 +22,0 @@ catch (err) { |
{ | ||
"name": "@microsoft/bf-lu", | ||
"version": "4.10.0-dev.20200804.10e144c", | ||
"version": "4.10.0-dev.20200806.789391b", | ||
"author": "Microsoft", | ||
@@ -5,0 +5,0 @@ "bugs": "https://github.com/microsoft/botframework-cli/issues", |
Sorry, the diff of this file is too big to display
837690
18652
7