Comparing version 0.5.1 to 0.7.0
237
bin/cli.js
#!/usr/bin/env node | ||
/* jshint maxlen: 500 */ | ||
var fs = require('fs'), | ||
async = require('async'), | ||
chalk = require('chalk'), | ||
isutf8 = require('isutf8'), | ||
program = require('commander'), | ||
Q = require('q'), | ||
yaspeller = require('../lib/yaspeller'), | ||
printDebug = require('../lib/debug'), | ||
FILENAME_DICTIONARY = '.yaspeller.dictionary.json', | ||
dictionary; | ||
mDebug = require('../lib/debug'), | ||
printDebug = mDebug.print, | ||
startTime = Date.now(), | ||
dictionary = [], | ||
settings = {}, | ||
jsonAtDir = {}, | ||
json = require('../.yaspellerrc.default.json'), | ||
defaultFileExtensions = json.fileExtensions.join(','), | ||
jsonAtDirFilename = './.yaspellerrc'; | ||
function getDictionary(filename) { | ||
var dict; | ||
program.debug && printDebug('get/check dictionary: ' + filename); | ||
if(fs.existsSync(filename)) { | ||
try { | ||
dict = fs.readFileSync(filename); | ||
if(!isutf8(dict)) { | ||
console.error(filename + ': is not utf-8'); | ||
process.exit(1); | ||
} | ||
dict = JSON.parse(dict.toString('utf-8')); | ||
program.debug && printDebug('use dictionary: ' + filename); | ||
} catch(e) { | ||
console.error(filename + ': error parsing JSON'); | ||
process.exit(1); | ||
} | ||
} | ||
return dict || []; | ||
} | ||
function getTypos(data) { | ||
@@ -40,4 +23,4 @@ var buf = []; | ||
var find = false; | ||
// ERROR_UNKNOWN_WORD: Слова нет в словаре | ||
if(el.code === 1) { | ||
if(el.code === 1) { // ERROR_UNKNOWN_WORD | ||
dictionary.some(function(el2) { | ||
@@ -97,4 +80,3 @@ if(el2 === el.word) { | ||
data.forEach(function(el) { | ||
// ERROR_REPEAT_WORD: Повтор слова | ||
if(el.code === 2) { | ||
if(el.code === 2) { // ERROR_REPEAT_WORD | ||
words.push(el.word); | ||
@@ -110,4 +92,3 @@ } | ||
data.forEach(function(el) { | ||
// ERROR_CAPITALIZATION: Неверное употребление прописных и строчных букв | ||
if(el.code === 3) { | ||
if(el.code === 3) { // ERROR_CAPITALIZATION | ||
words.push(el.word); | ||
@@ -123,4 +104,3 @@ } | ||
data.some(function(el) { | ||
// ERROR_TOO_MANY_ERRORS: Текст содержит слишком много ошибок | ||
if(el.code === 4) { | ||
if(el.code === 4) { // ERROR_TOO_MANY_ERRORS | ||
hasErrors = true; | ||
@@ -137,3 +117,8 @@ return true; | ||
var SEPARATOR = '\n-----'; | ||
return chalk.cyan(title + ': ' + words.length + SEPARATOR + '\n') + words.join('\n') + chalk.cyan(SEPARATOR); | ||
words.forEach(function(val, i) { | ||
words[i] = (i + 1) + '. ' + words[i]; | ||
}); | ||
return chalk.red(title + ': ' + words.length + SEPARATOR + '\n') + words.join('\n') + chalk.red(SEPARATOR) + '\n'; | ||
} | ||
@@ -166,7 +151,8 @@ | ||
var time = data.time ? ' ' + chalk.magenta(data.time + ' ms') : ''; | ||
if(textErrors.length) { | ||
console.error(chalk.red.bold('[ERR]'), data.resource); | ||
console.error(chalk.red('[ERR]') + ' ' + data.resource + time); | ||
console.error(textErrors.join('\n') + '\n'); | ||
} else { | ||
console.log(chalk.bold.green('[OK]'), data.resource); | ||
program.onlyErrors || console.log(chalk.green('[OK]') + ' ' + data.resource + time); | ||
} | ||
@@ -176,74 +162,126 @@ } | ||
function splitOnCommas(val) { | ||
return val.split(','); | ||
} | ||
var options = [ | ||
['ignoreUppercase', 'ignore words written in capital letters'], | ||
['ignoreDigits', 'ignore words with numbers, such as "avp17h4534"'], | ||
['ignoreUrls', 'ignore Internet addresses, email addresses and filenames'], | ||
['findRepeatWords', 'highlight repetitions of words, consecutive. For example, "I flew to to to Cyprus"'], | ||
['ignoreLatin', 'ignore words, written in Latin, for example, "madrid"'], | ||
['flagLatin', 'celebrate words, written in Latin, as erroneous'], | ||
['byWords', 'do not use a dictionary environment (context) during the scan. This is useful in cases where the service is transmitted to the input of a list of individual words'], | ||
['ignoreCapitalization', 'ignore the incorrect use of UPPERCASE / lowercase letters, for example, in the word "moscow"'], | ||
['ignoreRomanNumerals', 'ignore Roman numerals ("I, II, III, ...")'] | ||
]; | ||
program | ||
.version(require('../package.json').version) | ||
.usage('[options] <file-or-directory-or-link...>') | ||
.option('-d, --debug', 'Debug mode') | ||
.option('-di, --dictionary <s>', 'json file for own dictionary') | ||
.option('-f, --format <s>', 'Formats: plain or html. Default: plain') | ||
.option('-l, --lang <s>', 'Langs: ru, en, tr. Default: "en,ru"') | ||
.option('-n, --no-colors', 'Clean output without colors') | ||
.parse(process.argv); | ||
.option('-l, --lang <value>', 'languages: en, kk, ru or uk. Default: "en,ru"') | ||
.option('-f, --format <value>', 'formats: plain, html or auto. Default: auto') | ||
.option('--file-extensions <value>', 'set file extensions to search for files in a folder. Default: "' + defaultFileExtensions + '"', splitOnCommas, null) | ||
.option('--dictionary <file>', 'json file for own dictionary') | ||
.option('--no-colors', 'clean output without colors') | ||
.option('--max-requests <value>', 'max count of requests at a time. Default: 2', parseInt, 0) | ||
.option('--report', 'generate html report - ./yaspeller.html') | ||
.option('--only-errors', 'output only errors') | ||
.option('--debug', 'debug mode'); | ||
var startTime = Date.now(), | ||
settings = {}, | ||
jsonAtDirFilename = './.yaspeller.json', | ||
jsonAtDir = {}, | ||
json = JSON.parse(fs.readFileSync(__dirname + '/../.yaspeller.default.json', 'utf-8')); | ||
options.forEach(function(el) { | ||
program.option('--' + el[0].replace(/([A-Z])/g, '-$1').toLowerCase(), el[1]); | ||
}); | ||
program.debug && printDebug('get/check ./yaspeller.json'); | ||
if(fs.existsSync(jsonAtDirFilename)) { | ||
try { | ||
jsonAtDir = JSON.parse(fs.readFileSync(jsonAtDirFilename)); | ||
program.debug && printDebug('using ./.yaspeller.json'); | ||
} catch(e) { | ||
console.error('error parsing ./.yaspeller.json'); | ||
process.exit(1); | ||
} | ||
} | ||
program.parse(process.argv); | ||
Object.keys(jsonAtDir).forEach(function(key) { | ||
json[key] = jsonAtDir[key]; | ||
}); | ||
if(!program.args.length) { | ||
program.help(); | ||
} | ||
yaspeller.setHtmlExts(json.html); | ||
yaspeller.setFileExtensions(json.fileExtensions); | ||
yaspeller.setExcludeFiles(json.excludeFiles); | ||
printDebug('get/check ./yaspellerrc'); | ||
if(fs.existsSync(jsonAtDirFilename)) { | ||
try { | ||
jsonAtDir = JSON.parse(fs.readFileSync(jsonAtDirFilename)); | ||
printDebug('Using ' + jsonAtDirFilename); | ||
} catch(e) { | ||
console.error(chalk.red('Error parsing ' + jsonAtDirFilename)); | ||
process.exit(2); | ||
} | ||
} | ||
settings.lang = program.lang || json.lang; | ||
settings.format = program.format || json.format; | ||
Object.keys(jsonAtDir).forEach(function(key) { | ||
json[key] = jsonAtDir[key]; | ||
}); | ||
chalk.enabled = program.colors; | ||
if(!program.args.length) { | ||
program.help(); | ||
mDebug.setDebug(program.debug); | ||
yaspeller.setParams({ | ||
maxRequests: program.maxRequests || json.maxRequests || 2, | ||
fileExtensions: program.fileExtensions || json.fileExtensions, | ||
excludeFiles: json.excludeFiles | ||
}); | ||
settings.lang = program.lang || json.lang; | ||
settings.format = program.format || json.format; | ||
settings.options = json.options || {}; | ||
options.forEach(function(el) { | ||
var key = el[0]; | ||
if(program[key]) { | ||
settings.options[key] = true; | ||
} else if(typeof json[key] !== 'undefined') { | ||
settings.options[key] = json[key]; | ||
} | ||
}); | ||
if(program.debug) { | ||
Object.keys(settings.options).forEach(function(key) { | ||
printDebug('option "' + key + '" is true'); | ||
}); | ||
} | ||
var hasErrors = false, | ||
onNext = function(data) { | ||
data.forEach(function(el) { | ||
if(el[0]) { | ||
hasErrors = true; | ||
json.dictionary || (dictionary = json.dictionary); | ||
if(program.dictionary) { | ||
if(fs.existsSync(program.dictionary)) { | ||
printDebug('get/check dictionary: ' + program.dictionary); | ||
try { | ||
var bufDict = fs.readFileSync(program.dictionary); | ||
if(!isutf8(bufDict)) { | ||
console.error(program.dictionary + ': is not utf-8'); | ||
process.exit(2); | ||
} | ||
dictionary = JSON.parse(bufDict.toString('utf-8')); | ||
printDebug('use dictionary: ' + program.dictionary); | ||
} catch(e) { | ||
console.error(program.dictionary + ': error parsing JSON'); | ||
process.exit(2); | ||
} | ||
} else { | ||
console.error(program.dictionary + ': is not exists'); | ||
process.exit(2); | ||
} | ||
} | ||
buildResource(el[0], el[1]); | ||
}); | ||
var hasErrors = false, | ||
tasks = [], | ||
onResource = function(err, data) { | ||
err || (hasErrors = true); | ||
buildResource(err, data); | ||
}; | ||
yaspeller.setDebug(program.debug); | ||
dictionary = getDictionary(program.dictionary || FILENAME_DICTIONARY); | ||
var queries = []; | ||
program.args.forEach(function(resource) { | ||
queries.push(Q.Promise(function(resolve) { | ||
tasks.push(function(cb) { | ||
if(resource.search(/^https?:/) > -1) { | ||
if(resource.search(/sitemap\.xml$/) > -1) { | ||
yaspeller.checkSitemap(resource, function(err, data) { | ||
onNext(err, data); | ||
resolve(); | ||
}, settings); | ||
yaspeller.checkSitemap(resource, function() { | ||
cb(); | ||
}, settings, onResource); | ||
} else { | ||
yaspeller.checkUrl(resource, function(err, data) { | ||
onNext([[err, data]]); | ||
resolve(); | ||
onResource(err, data); | ||
cb(); | ||
}, settings); | ||
@@ -254,23 +292,22 @@ } | ||
if(fs.statSync(resource).isDirectory()) { | ||
yaspeller.checkDir(resource, function(err, data) { | ||
onNext(err, data); | ||
resolve(); | ||
}, settings); | ||
yaspeller.checkDir(resource, function() { | ||
cb(); | ||
}, settings, onResource); | ||
} else { | ||
yaspeller.checkFile(resource, function(err, data) { | ||
onNext([[err, data]]); | ||
resolve(); | ||
onResource(err, data); | ||
cb(); | ||
}, settings); | ||
} | ||
} else { | ||
onNext([[true, Error(resource + ': is not exists')]]); | ||
resolve(); | ||
onResource(true, Error(resource + ': is not exists')); | ||
cb(); | ||
} | ||
} | ||
})); | ||
}); | ||
}); | ||
Q.all(queries).then(function() { | ||
console.log(chalk.magenta('Build finished: ' + ((+new Date() - startTime) / 1000) + ' sec.')); | ||
async.series(tasks, function() { | ||
program.onlyErrors || console.log(chalk.magenta('Checking finished: ' + ((+new Date() - startTime) / 1000) + ' sec.')); | ||
process.exit(hasErrors ? 1 : 0); | ||
}); |
@@ -1,5 +0,20 @@ | ||
var chalk = require('chalk'); | ||
var chalk = require('chalk'), | ||
isDebug = false; | ||
module.exports = function(text) { | ||
console.log(chalk.cyan('[DEBUG]') + ' ' + text); | ||
module.exports = { | ||
setDebug: function(val) { | ||
isDebug = val; | ||
}, | ||
print: function(text) { | ||
if(isDebug) { | ||
if(typeof text === 'object') { | ||
console.log(chalk.cyan('[DEBUG]')); | ||
Object.keys(text).forEach(function(key) { | ||
console.log(chalk.cyan(' ' + key + ': ') + text[key]); | ||
}); | ||
} else { | ||
console.log(chalk.cyan('[DEBUG]') + ' ' + text); | ||
} | ||
} | ||
} | ||
}; |
/* jshint maxlen: 300 */ | ||
var request = require('request'), | ||
pth = require('path'), | ||
async = require('async'), | ||
fs = require('fs'), | ||
isutf8 = require('isutf8'), | ||
Q = require('q'), | ||
printDebug = require('../lib/debug'), | ||
minimatch = require('minimatch'), | ||
pth = require('path'), | ||
xml2js = require('xml2js'), | ||
isDebug = false, | ||
fileExtensions = [], | ||
htmlExts = [], | ||
excludeFiles = [], | ||
YASPELLER_API_URL = 'http://speller.yandex.net/services/spellservice.json/checkText'; | ||
printDebug = require('../lib/debug').print, | ||
YASPELLER_API_URL = 'http://speller.yandex.net/services/spellservice.json/checkText', | ||
params = { | ||
excludeFiles: [], | ||
fileExtensions: [], | ||
maxRequests: 2 | ||
}; | ||
function getExtension(file) { | ||
var buf = file.split('.'); | ||
return buf[buf.length - 1]; | ||
function isHTML(text) { | ||
return text.search(/<[a-z!]/i) !== -1; | ||
} | ||
function isDir(dir) { | ||
return fs.statSync(dir).isDirectory(); | ||
} | ||
function getFormat(format, ext) { | ||
if(format === 'html' || format === 'plain') { | ||
function getFormat(text, format) { | ||
if(format === 'html' || format === 'plain'){ | ||
return format; | ||
} | ||
return htmlExts.indexOf(ext) > -1 ? 'html' : 'plain'; | ||
if(format === 'auto' || !format) { | ||
return isHTML(text) ? 'html' : 'plain'; | ||
} | ||
return 'plain'; | ||
} | ||
@@ -41,3 +41,3 @@ | ||
IGNORE_LATIN: 16, | ||
NO_SUGGEST: 32, | ||
//NO_SUGGEST: 32, | ||
FLAG_LATIN: 128, | ||
@@ -50,4 +50,5 @@ BY_WORDS: 256, | ||
Object.keys(options || {}).forEach(function(key) { | ||
if(standartOptions[key] && options[key]) { | ||
result |= standartOptions[key]; | ||
var upperCaseKey = key.replace(/([A-Z])/g, '_$1').toUpperCase(); | ||
if(standartOptions[upperCaseKey] && options[key]) { | ||
result |= standartOptions[upperCaseKey]; | ||
} | ||
@@ -60,9 +61,7 @@ }); | ||
function prepareText(text, format) { | ||
var buf = text.trim(); | ||
if(format === 'html') { | ||
buf = buf.replace(/<\/?[^>]+>/g, ' '); // strip html tags | ||
text = text.replace(/<\/?[^>]+>/g, ' '); // strip html tags | ||
} | ||
return buf.replace(/\r\n/g, '\n') // fix Windows | ||
return text.replace(/\r\n/g, '\n') // fix Windows | ||
.replace(/\r/g, '\n') // fix MacOS | ||
@@ -77,3 +76,6 @@ .replace(/\s+\n/g, '\n') // trailling spaces | ||
var res = [], | ||
regExp = new RegExp('\\.(' + fileExtensions.join('|') + ')$', 'i'), | ||
regExp = new RegExp('\\.(' + params.fileExtensions.join('|') + ')$', 'i'), | ||
isDir = function(dir) { | ||
return fs.statSync(dir).isDirectory(); | ||
}, | ||
find = function(path) { | ||
@@ -83,4 +85,4 @@ var files = fs.readdirSync(path); | ||
var file = pth.join(path, el); | ||
for(var i = 0; i < excludeFiles.length; i++) { | ||
if(minimatch(file, excludeFiles[i])) { | ||
for(var i = 0; i < params.excludeFiles.length; i++) { | ||
if(minimatch(file, params.excludeFiles[i])) { | ||
return; | ||
@@ -113,16 +115,16 @@ } | ||
* @tutorial settings | ||
* @param {Object} [settings] Настройки | ||
* @param {string} [settings.format] Формат текста: plain или html | ||
* @param {string|Array} [settings.lang] Языки проверки: ru – русский, uk – украинский, en – английский | ||
* @param {Object} [settings.options] Опции | ||
* @param {boolean} [settings.options.IGNORE_UPPERCASE] Пропускать слова, написанные заглавными буквами, например, "ВПК" | ||
* @param {boolean} [settings.options.IGNORE_DIGITS] Пропускать слова с цифрами, например, "авп17х4534" | ||
* @param {boolean} [settings.options.IGNORE_URLS] Пропускать интернет-адреса, почтовые адреса и имена файлов | ||
* @param {boolean} [settings.options.FIND_REPEAT_WORDS] Подсвечивать повторы слов, идущие подряд. Например, "я полетел на на Кипр" | ||
* @param {boolean} [settings.options.IGNORE_LATIN] Пропускать слова, написанные латиницей, например, "madrid" | ||
* @param {boolean} [settings.options.NO_SUGGEST] Только проверять текст, не выдавая вариантов для замены | ||
* @param {boolean} [settings.options.FLAG_LATIN] Отмечать слова, написанные латиницей, как ошибочные | ||
* @param {boolean} [settings.options.BY_WORDS] Не использовать словарное окружение (контекст) при проверке. Опция полезна в случаях, когда на вход сервиса передается список отдельных слов | ||
* @param {boolean} [settings.options.IGNORE_CAPITALIZATION] Игнорировать неверное употребление ПРОПИСНЫХ/строчных букв, например, в слове "москва" | ||
* @param {boolean} [settings.options.IGNORE_ROMAN_NUMERALS] Игнорировать римские цифры ("I, II, III, ...") | ||
* @param {Object} [settings] | ||
* @param {string} [settings.format] Text format: plain or html | ||
* @param {string|Array} [settings.lang] Language: en, kk, ru or uk | ||
* @param {Object} [settings.options] | ||
* @param {boolean} [settings.options.ignoreUppercase] Ignore words written in capital letters | ||
* @param {boolean} [settings.options.ignoreDigits] Ignore words with numbers, such as "avp17h4534" | ||
* @param {boolean} [settings.options.ignoreUrls] Ignore Internet addresses, email addresses and filenames | ||
* @param {boolean} [settings.options.findRepeatWords] Highlight repetitions of words, consecutive. For example, "I flew to to to Cyprus" | ||
* @param {boolean} [settings.options.ignoreLatin] Ignore words, written in Latin, for example, "madrid" | ||
* @param {boolean} [settings.options.noSuggest] Just check the text, without giving options to replace | ||
* @param {boolean} [settings.options.flagLatin] Celebrate words, written in Latin, as erroneous | ||
* @param {boolean} [settings.options.byWords] Do not use a dictionary environment (context) during the scan. This is useful in cases where the service is transmitted to the input of a list of individual words | ||
* @param {boolean} [settings.options.ignoreCapitalization] Ignore the incorrect use of UPPERCASE / lowercase letters, for example, in the word "moscow" | ||
* @param {boolean} [settings.options.ignoreRomanNumerals] Ignore Roman numerals ("I, II, III, ...") | ||
*/ | ||
@@ -132,36 +134,102 @@ function checkText(text, callback, settings) { | ||
var bufText, | ||
format = settings.format, | ||
options = settings.options, | ||
lang = settings.lang; | ||
var format = getFormat(text, settings.format || 'plain'), | ||
options = getOptions(settings.options), | ||
lang = settings.lang || 'en,ru'; | ||
if(Array.isArray(text)) { | ||
bufText = []; | ||
text.forEach(function(el) { | ||
bufText.push(prepareText(el, format)); | ||
text = prepareText(text, format); | ||
if(Array.isArray(lang)) { | ||
lang = lang.join(','); | ||
} | ||
var tasks = [], | ||
texts = splitText(text); | ||
texts.forEach(function(el, i) { | ||
printDebug({ | ||
request: i, | ||
format: format, | ||
lang: lang, | ||
options: options, | ||
text: el.length > 64 ? el.substr(0, 64) + '...' : el | ||
}); | ||
bufText = bufText.join('\n'); | ||
} else { | ||
bufText = prepareText(text, format); | ||
tasks.push(function(cb) { | ||
request.post(YASPELLER_API_URL, { | ||
form: { | ||
format: format, | ||
lang: lang, | ||
options: options, | ||
text: el | ||
} | ||
}, function(error, response, body) { | ||
if(!error && response && response.statusCode === 200) { | ||
cb(false, [false, JSON.parse(body)]); | ||
} else { | ||
cb(false, [true, Error('Yandex.Speller API returns status code is ' + (response && response.statusCode))]); | ||
} | ||
}); | ||
}); | ||
}); | ||
async.parallelLimit(tasks, params.maxRequests, function(err, data) { | ||
var buf = mergeResults(data); | ||
callback(buf.err, buf.data); | ||
}); | ||
} | ||
function splitText(text) { | ||
var MAX_LEN = 10000, // max length of text for Yandex.Speller API | ||
texts = [], | ||
pos = 0, | ||
newPos = 0; | ||
while(pos < text.length) { | ||
if(pos + MAX_LEN >= text.length) { | ||
texts.push(text.substring(pos)); | ||
break; | ||
} else { | ||
newPos = getPosition(text, pos + MAX_LEN); | ||
texts.push(text.substring(pos, newPos)); | ||
pos = newPos; | ||
} | ||
} | ||
if(Array.isArray(lang)) { | ||
lang = lang.join(','); | ||
return texts; | ||
} | ||
function getPosition(text, start) { | ||
for(var i = start - 1; i >= start - 500; i--) { | ||
var sym = text[i]; | ||
if(sym === ' ' || sym === '\n' || sym === '\t') { | ||
return i; | ||
} | ||
} | ||
request.post(YASPELLER_API_URL, { | ||
form: { | ||
format: format || 'plain', | ||
lang: lang || 'ru,en', | ||
options: getOptions(options) || 0, | ||
text: bufText | ||
return start; | ||
} | ||
function mergeResults(res) { | ||
var err = false, data = []; | ||
res.some(function(el) { | ||
if(el[0]) { | ||
err = true; | ||
data = el[1]; | ||
return true; | ||
} | ||
}, function(error, response, body) { | ||
if(!error && response.statusCode === 200) { | ||
callback(false, JSON.parse(body)); | ||
} else { | ||
callback(true, Error('Yandex.Speller API returns status code is ' + response.statusCode)); | ||
} | ||
return false; | ||
}); | ||
if(!err) { | ||
res.forEach(function(el) { | ||
data = data.concat(el[1]); | ||
}); | ||
} | ||
return { | ||
err: err, | ||
data: data | ||
}; | ||
} | ||
@@ -179,3 +247,3 @@ | ||
isDebug && printDebug('get: ' + file); | ||
printDebug('get: ' + file); | ||
@@ -186,8 +254,7 @@ if(fs.existsSync(file)) { | ||
if(isutf8(buf)) { | ||
settings.format = getFormat(settings.format, getExtension(file, settings.format)); | ||
printDebug('post text -> Yandex.Speller API: ' + file); | ||
isDebug && printDebug('post text -> api: ' + file); | ||
var startTime = Date.now(); | ||
checkText(buf.toString(), function(err, data) { | ||
callback(err, err ? data : {resource: file, data: data}); | ||
callback(err, err ? data : {resource: file, data: data, time: Date.now() - startTime}); | ||
}, settings); | ||
@@ -213,5 +280,7 @@ } else { | ||
function checkUrl(url, callback, settings) { | ||
isDebug && printDebug('get: ' + url); | ||
settings = settings || {}; | ||
request.get(url, function(error, response, text) { | ||
printDebug('get: ' + url); | ||
request.get(url, function(error, response, text) { | ||
if(error || response.statusCode !== 200) { | ||
@@ -222,4 +291,5 @@ callback(true, Error(url + ': returns status code is ' + response.statusCode)); | ||
var startTime = Date.now(); | ||
checkText(text, function(err, data) { | ||
callback(err, err ? data : {resource: url, data: data}); | ||
callback(err, err ? data : {resource: url, data: data, time: Date.now() - startTime}); | ||
}, settings); | ||
@@ -233,8 +303,13 @@ }); | ||
* @param {string} dir | ||
* @param {Function} callback | ||
* @param {Function} commonCallback Common callback | ||
* @param {Object} [settings] See {@tutorial settings} | ||
* @param {Function} [callback] Callback on each file | ||
*/ | ||
function checkDir(dir, callback, settings) { | ||
function checkDir(dir, commonCallback, settings, callback) { | ||
settings = settings || {}; | ||
if(!fs.existsSync(dir)) { | ||
callback([[true, Error(dir + ': file or directory is not exists')]]); | ||
var obj = [true, Error(dir + ': file or directory is not exists')]; | ||
callback && callback.apply(this, obj); | ||
commonCallback([obj]); | ||
return; | ||
@@ -244,16 +319,15 @@ } | ||
var files = findFiles(dir), | ||
queries = []; | ||
tasks = []; | ||
files.forEach(function(file) { | ||
queries.push(Q.Promise(function(resolve) { | ||
settings = settings || {}; | ||
settings.format = getFormat(settings.format, getExtension(file)); | ||
tasks.push(function(cb) { | ||
checkFile(file, function(err, data) { | ||
resolve([err, data]); | ||
callback && callback(err, data); | ||
cb(false, [err, data]); | ||
}, settings); | ||
})); | ||
}); | ||
}); | ||
Q.all(queries).done(function(data) { | ||
callback(data); | ||
async.parallelLimit(tasks, params.maxRequests, function(err, data) { | ||
commonCallback(data); | ||
}); | ||
@@ -266,15 +340,19 @@ } | ||
* @param {string} url | ||
* @param {Function} callback | ||
* @param {Function} commonCallback Common callback | ||
* @param {Object} [settings] See {@tutorial settings} | ||
* @param {Function} [callback] Callback on each url | ||
*/ | ||
function checkSitemap(url, callback, settings) { | ||
var queries = [], | ||
results = []; | ||
function checkSitemap(url, commonCallback, settings, callback) { | ||
settings = settings || {}; | ||
isDebug && printDebug('get: ' + url); | ||
var results = []; | ||
printDebug('get: ' + url); | ||
request.get(url, function(error, response, xml) { | ||
if(error || response.statusCode !== 200) { | ||
results.push([true, Error(url + ': returns status code is ' + response.statusCode)]); | ||
callback(results); | ||
var obj = [true, Error(url + ': returns status code is ' + response.statusCode)]; | ||
results.push(obj); | ||
callback && callback.apply(this, obj); | ||
commonCallback(results); | ||
@@ -287,15 +365,19 @@ return; | ||
if(err) { | ||
results.push([true, Error(url + ': error parsing xml')]); | ||
callback(results); | ||
var obj = [true, Error(url + ': error parsing xml')]; | ||
results.push(obj); | ||
callback && callback.apply(this, obj); | ||
commonCallback(results); | ||
return; | ||
} | ||
var tasks = []; | ||
if(result && result.urlset && Array.isArray(result.urlset.url)) { | ||
result.urlset.url.forEach(function(el) { | ||
el.loc.forEach(function(url) { | ||
queries.push(Q.Promise(function(resolve) { | ||
el.loc && el.loc.forEach(function(url) { | ||
tasks.push(function(cb) { | ||
checkUrl(url, function(err, data) { | ||
resolve([err, data]); | ||
callback && callback(err, data); | ||
cb(false, [err, data]); | ||
}, settings); | ||
})); | ||
}); | ||
}); | ||
@@ -305,4 +387,4 @@ }); | ||
Q.all(queries).done(function(data) { | ||
callback(data); | ||
async.parallelLimit(tasks, params.maxRequests, function(err, data) { | ||
commonCallback(data); | ||
}); | ||
@@ -314,14 +396,7 @@ }); | ||
module.exports = { | ||
setDebug: function(val) { | ||
isDebug = val; | ||
setParams: function(p) { | ||
Object.keys(p).forEach(function(key) { | ||
params[key] = p[key]; | ||
}); | ||
}, | ||
setExcludeFiles: function(files) { | ||
excludeFiles = files; | ||
}, | ||
setFileExtensions: function(exts) { | ||
fileExtensions = exts; | ||
}, | ||
setHtmlExts: function(exts) { | ||
htmlExts = exts; | ||
}, | ||
checkText: checkText, | ||
@@ -328,0 +403,0 @@ checkFile: checkFile, |
@@ -9,4 +9,4 @@ { | ||
"main": "index.js", | ||
"description": "Проверка опечаток в текстах, в файлах и на сайтах", | ||
"version": "0.5.1", | ||
"description": "Search tool typos in the text, files and websites", | ||
"version": "0.7.0", | ||
"license": "MIT", | ||
@@ -25,3 +25,4 @@ "homepage": "https://github.com/hcodes/yaspeller", | ||
"dependencies": { | ||
"commander": "~2.5", | ||
"async": "0.9.0", | ||
"commander": "~2.6", | ||
"request": "~2.x", | ||
@@ -31,10 +32,9 @@ "isutf8": "~1.0", | ||
"chalk": "0.5.1", | ||
"q": "~1.1.2", | ||
"minimatch": "~2.0.1" | ||
}, | ||
"devDependencies": { | ||
"mocha": "2.0.x", | ||
"mocha": "2.1.x", | ||
"chai": "1.x", | ||
"istanbul": "0.3.x", | ||
"jscs": "1.8.x", | ||
"jscs": "1.9.x", | ||
"jshint": "2.x" | ||
@@ -47,3 +47,3 @@ }, | ||
"scripts": { | ||
"test": "npm run-script jshint && npm run-script check-style && npm run-script unit-test-coverage", | ||
"test": "npm run-script jshint && npm run-script jscs && npm run-script unit-test-coverage", | ||
"jscs": "./node_modules/.bin/jscs .", | ||
@@ -50,0 +50,0 @@ "jshint": "./node_modules/.bin/jshint .", |
149
README.md
yaspeller | ||
========= | ||
[![NPM version](https://badge.fury.io/js/yaspeller.svg)](http://badge.fury.io/js/yaspeller) | ||
[![Build Status](https://travis-ci.org/hcodes/yaspeller.png?branch=master)](https://travis-ci.org/hcodes/yaspeller) | ||
[![Coverage Status](https://coveralls.io/repos/hcodes/yaspeller/badge.png?branch=master)](https://coveralls.io/r/hcodes/yaspeller) | ||
[![Dependency Status](https://gemnasium.com/hcodes/yaspeller.svg)](https://gemnasium.com/hcodes/yaspeller) | ||
[![NPM version](https://img.shields.io/npm/v/yaspeller.svg)](https://www.npmjs.com/package/yaspeller) | ||
[![Build Status](https://img.shields.io/travis/hcodes/yaspeller.svg)](https://travis-ci.org/hcodes/yaspeller) | ||
[![Coverage Status](https://img.shields.io/coveralls/hcodes/yaspeller.svg)](https://coveralls.io/r/hcodes/yaspeller) | ||
[![Dependency Status](https://img.shields.io/david/hcodes/yaspeller.svg)](https://david-dm.org/hcodes/yaspeller) | ||
[![devDependency Status](https://img.shields.io/david/dev/hcodes/yaspeller.svg)](https://david-dm.org/hcodes/yaspeller#info=devDependencies) | ||
Средство поиска опечаток в текстах, в файлах и на сайтах. | ||
Search tool typos in the text, files and websites. | ||
Используется API [Яндекс.Спеллера](https://tech.yandex.ru/speller/doc/dg/concepts/About-docpage/). | ||
## Установка | ||
Used API [Yandex.Speller](https://tech.yandex.ru/speller/doc/dg/concepts/About-docpage/). | ||
## Installation | ||
`npm install yaspeller -g` | ||
## Использование в командной строке | ||
+ `yaspeller mytext.txt` - поиск опечаток в файле | ||
+ `yaspeller ./texts/` - поиск опечаток в файлах (xml, html, htm, txt, text, svg, md, wiki, js, css) в папке | ||
+ `yaspeller http://www.yandex.ru/` - поиск опечаток на сайте | ||
+ `yaspeller http://bem.info/sitemap.xml` - поиск опечаток по всему сайту | ||
## Using CLI | ||
`yaspeller [options] <file-or-directory-or-link...>` | ||
### Examples | ||
+ `yaspeller README.md` — search typos in the file. | ||
+ `yaspeller ./texts/` — finding typos in files (xml, html, htm, txt, text, svg, md, wiki) in the folder. | ||
+ `yaspeller http://www.yandex.ru/` — search typos in the page. | ||
+ `yaspeller http://bem.info/sitemap.xml` — search typos at the addresses specified in the sitemap.xml. | ||
### Options | ||
#### `-f, --format <value>` | ||
Formats: `plain` or `html`. | ||
Default: `plain`. | ||
#### `-l, --lang <value>` | ||
Languages: `en`, `kk`, `ru` or `uk`.<br/> | ||
Default: `en,ru`. | ||
#### `--file-extensions <value>` | ||
Set file extensions to search for files in a folder.<br/> | ||
Default: `md,htm,html,txt,text,svg,wiki,xhtml,xml`. | ||
#### `--dictionary <file>` | ||
JSON file for own dictionary. | ||
```JSON | ||
[ | ||
"someword1", | ||
"someword2", | ||
"someword3" | ||
] | ||
``` | ||
#### `--by-words` | ||
Do not use a dictionary environment (context) during the scan.<br/> | ||
This is useful in cases where the service is transmitted to the input of a list of individual words. | ||
#### `--find-repeat-words` | ||
Highlight repetitions of words, consecutive. For example, `I flew to to to Cyprus`. | ||
#### `--flag-latin` | ||
Celebrate words, written in Latin, as erroneous. | ||
#### `--ignore-capitalization` | ||
Ignore the incorrect use of UPPERCASE / lowercase letters, for example, in the word `moscow`. | ||
#### `--ignore-digits` | ||
Ignore words with numbers, such as `avp17h4534`. | ||
#### `--ignore-latin` | ||
Ignore words, written in Latin, for example, `madrid`. | ||
#### `--ignore-roman-numerals` | ||
Ignore Roman numerals `I, II, III, ...`. | ||
#### `--ignore-uppercase` | ||
Ignore words written in capital letters. | ||
#### `--ignore-urls` | ||
Ignore Internet addresses, email addresses and filenames. | ||
#### `--max-requests <value>` | ||
Max count of requests at a time.<br/> | ||
Default: `2`. | ||
#### `--no-colors` | ||
Clean output without colors. | ||
#### `--only-errors` | ||
Output only errors. | ||
#### `--debug` | ||
Debug mode. | ||
## Configuration | ||
`npm install yaspeller --save-dev` | ||
Add the text in `package.json` / `scripts`:<br/> | ||
` "yaspeller": "./node_modules/.bin/yaspeller .",` | ||
To run the linter:<br/> | ||
`npm run yaspeller` | ||
Yaspeller is configured using `.yaspellerrc` JSON file at the root of the project. | ||
```JSON | ||
{ | ||
"excludeFiles": [ | ||
".git", | ||
"yaspeller", | ||
"node_modules", | ||
"libs" | ||
], | ||
"format": "auto", | ||
"lang": "ru", | ||
"fileExtensions": [ | ||
"md", | ||
"js", | ||
"css" | ||
], | ||
"dictionary": [ | ||
"someword1" | ||
] | ||
} | ||
``` | ||
| Property | Type | Details | | ||
|----------|------|---------| | ||
| `format` | `String` | [`--format`](#-f---format-value) | | ||
| `lang` | `String` | [`--lang`](#-l---lang-value) | | ||
| `excludeFiles` | `Array` | | | ||
| `fileExtensions` | `Array` | [`--file-extension`](#--file-extensions-value) | | ||
| `dictionary` | `Array` | [`--dictionary`](#--dictionary-file) | | ||
| `byWords` | `Boolean` | [`--by-words`](#--by-words) | | ||
| `findRepeatWords` | `Boolean` | [`--find-repeat-words`](#--find-repeat-words) | | ||
| `flagLatin` | `Boolean` | [`--flag-latin`](#--flag-latin) | | ||
| `ignoreCapitalization` | `Boolean` | [`--ignore-capitalization`](#--ignore-capitalization) | | ||
| `ignoreDigits` | `Boolean` | [`--ignore-digits`](#--ignore-digits) | | ||
| `ignoreLatin` | `Boolean` | [`--ignore-latin`](#--ignore-latin) | | ||
| `ignoreRomanNumerals` | `Boolean` | [`--ignore-roman-numerals`](#--ignore-roman-numerals) | | ||
| `ignoreUppercase` | `Boolean` | [`--ignore-uppercase`](#--ignore-uppercase) | | ||
| `ignoreUrls` | `Boolean` | [`--ignore-urls`](#--ignore-urls) | | ||
| `maxRequests` | `Boolean` | [`--max-requests`](#--max-requests-value) | | ||
## [License](./LICENSE.md) | ||
MIT License |
162
test/test.js
@@ -5,6 +5,8 @@ var yaspeller = require('../lib/yaspeller'), | ||
url404 = 'https://raw.githubusercontent.com/asd9qi9e91ke9k2k193k19', | ||
urlGH = 'https://raw.githubusercontent.com/hcodes/yaspeller/master/test/texts/'; | ||
urlGH = 'https://raw.githubusercontent.com/hcodes/yaspeller/master/test/texts/', | ||
getFile = function(name) { | ||
return fs.readFileSync(name).toString('utf-8'); | ||
}; | ||
describe('API', function() { | ||
yaspeller.setDebug(true); | ||
it('checkFile', function(done) { | ||
@@ -47,3 +49,6 @@ yaspeller.checkFile('./test/texts/repeat_words.txt', function(err, data) { | ||
yaspeller.setFileExtensions(['txt']); | ||
yaspeller.setParams({ | ||
fileExtensions: ['txt'] | ||
}); | ||
it('checkDir', function(done) { | ||
@@ -126,10 +131,147 @@ yaspeller.checkDir('./test/texts/checkdir', function(data) { | ||
it('checkText as array', function(done) { | ||
var text = fs.readFileSync('./test/texts/repeat_words.txt').toString('utf-8'); | ||
yaspeller.checkText([text, text], function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 4); | ||
done(); | ||
}, {lang: 'ru', format: 'plain'}); | ||
describe('Options', function() { | ||
it('ignoreUppercase on', function(done) { | ||
var text = getFile('./test/texts/ignore_uppercase.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'ru', format: 'plain', options: {ignoreUppercase: true}}); | ||
}); | ||
it('ignoreUppercase off', function(done) { | ||
var text = getFile('./test/texts/ignore_uppercase.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'ru', format: 'plain'}); | ||
}); | ||
it('ignoreDigits on', function(done) { | ||
var text = getFile('./test/texts/ignore_digits.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'ru', format: 'plain', options: {ignoreDigits: true}}); | ||
}); | ||
it('ignoreDigits off', function(done) { | ||
var text = getFile('./test/texts/ignore_digits.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'ru', format: 'plain'}); | ||
}); | ||
it('ignoreLatin on', function(done) { | ||
var text = getFile('./test/texts/ignore_latin.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'en,ru', format: 'plain', options: {ignoreLatin: true}}); | ||
}); | ||
it('ignoreLatin off', function(done) { | ||
var text = getFile('./test/texts/ignore_latin.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'en,ru', format: 'plain'}); | ||
}); | ||
it('ignoreUrls on', function(done) { | ||
var text = getFile('./test/texts/ignore_urls.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'en,ru', format: 'plain', options: {ignoreUrls: true}}); | ||
}); | ||
it('ignoreUrls off', function(done) { | ||
var text = getFile('./test/texts/ignore_urls.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 2); | ||
done(); | ||
}, {lang: 'en,ru', format: 'plain'}); | ||
}); | ||
it('ignoreCapitalization on', function(done) { | ||
var text = getFile('./test/texts/ignore_capitalization.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'ru', format: 'plain', options: {ignoreCapitalization: true}}); | ||
}); | ||
it('ignoreCapitalization off', function(done) { | ||
var text = getFile('./test/texts/ignore_capitalization.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'ru', format: 'plain'}); | ||
}); | ||
it('findRepeatWords on', function(done) { | ||
var text = getFile('./test/texts/find_repeat_words.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'ru', format: 'plain', options: {findRepeatWords: true}}); | ||
}); | ||
it('findRepeatWords off', function(done) { | ||
var text = getFile('./test/texts/find_repeat_words.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'ru', format: 'plain'}); | ||
}); | ||
it('ignoreRomanNumerals on', function(done) { | ||
var text = getFile('./test/texts/ignore_roman_numerals.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'en,ru', format: 'plain', options: {ignoreRomanNumerals: true}}); | ||
}); | ||
it('ignoreRomanNumerals off', function(done) { | ||
var text = getFile('./test/texts/ignore_roman_numerals.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'en,ru', format: 'plain'}); | ||
}); | ||
it('flagLatin on', function(done) { | ||
var text = getFile('./test/texts/flag_latin.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 1); | ||
done(); | ||
}, {lang: 'ru', format: 'plain', options: {flagLatin: true}}); | ||
}); | ||
it('flagLatin off', function(done) { | ||
var text = getFile('./test/texts/flag_latin.txt'); | ||
yaspeller.checkText(text, function(err, data) { | ||
assert.equal(err, false); | ||
assert.equal(data.length, 0); | ||
done(); | ||
}, {lang: 'ru', format: 'plain'}); | ||
}); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Unidentified License
License(Experimental) Something that seems like a license was found, but its contents could not be matched with a known license.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
55601
34
880
141
1
70
+ Addedasync@0.9.0
+ Addedasync@0.9.0(transitive)
+ Addedcommander@2.6.0(transitive)
- Removedq@~1.1.2
- Removedcommander@2.5.1(transitive)
- Removedq@1.1.2(transitive)
Updatedcommander@~2.6