Comparing version 0.8.1 to 1.0.0-pre4
@@ -5,3 +5,14 @@ { | ||
"format": "auto", | ||
"lang": "en,ru" | ||
"ignoreComments": true, | ||
"ignoreTags": [ | ||
"code", | ||
"kbd", | ||
"object", | ||
"samp", | ||
"script", | ||
"style", | ||
"var" | ||
], | ||
"lang": "en,ru", | ||
"report": ["console"] | ||
} |
/* jshint maxlen: 300 */ | ||
var request = require('request'), | ||
async = require('async'), | ||
var async = require('async'), | ||
entities = require('entities'), | ||
fs = require('fs'), | ||
isutf8 = require('isutf8'), | ||
minimatch = require('minimatch'), | ||
pth = require('path'), | ||
request = require('request'), | ||
Showdown = require('showdown'), | ||
xml2js = require('xml2js'), | ||
markdownConverter = new Showdown.converter(), | ||
printDebug = require('../lib/debug').print, | ||
YASPELLER_API_URL = 'https://speller.yandex.net/services/spellservice.json/checkText', | ||
params = { | ||
excludeFiles: [], | ||
fileExtensions: [], | ||
maxRequests: 2 | ||
}; | ||
YASPELLER_API_CHECKTEXT = 'https://speller.yandex.net/services/spellservice.json/checkText', | ||
MAX_LEN_TEXT = 10000; // Max length of text for Yandex.Speller API | ||
@@ -21,4 +18,15 @@ function isHTML(text) { | ||
function isMarkdown(text) { | ||
return [ | ||
/^===/m, | ||
/^```/m, | ||
/-- ?:?\|/, | ||
/\)\[(https?|mailto):/ | ||
].some(function(el) { | ||
return text.search(el) !== -1; | ||
}); | ||
} | ||
function getFormat(text, format) { | ||
if(format === 'html' || format === 'plain'){ | ||
if(['html', 'markdown', 'plain'].indexOf(format) !== -1) { | ||
return format; | ||
@@ -28,3 +36,3 @@ } | ||
if(format === 'auto' || !format) { | ||
return isHTML(text) ? 'html' : 'plain'; | ||
return isMarkdown(text) ? 'markdown' : (isHTML(text) ? 'html' : 'plain'); | ||
} | ||
@@ -35,2 +43,6 @@ | ||
function getApiFormat(format) { | ||
return format === 'html' ? format : 'plain'; | ||
} | ||
function getOptions(options) { | ||
@@ -62,54 +74,43 @@ var result = 0, | ||
function prepareText(text, format) { | ||
if(format === 'html') { | ||
text = text.replace(/<\/?[^>]+>/g, ' '); // strip html tags | ||
} | ||
return text.replace(/\r\n/g, '\n') // fix Windows | ||
.replace(/\r/g, '\n') // fix MacOS | ||
.replace(/\s+\n/g, '\n') // trailling spaces | ||
.replace(/\s+/g, ' ') // repeat spaces | ||
.replace(/\n+/g, '\n') | ||
return text.replace(/\r\n/g, '\n') // Fix Windows | ||
.replace(/\r/g, '\n') // Fix MacOS | ||
.replace(/\s+\n/g, '\n') // Trailling spaces | ||
.replace(/\s+/g, ' ') // Repeat spaces | ||
.replace(/\n+/g, '\n') // Repeat line ends | ||
.trim(); | ||
} | ||
function isExcludedFile(file) { | ||
file = pth.resolve(file); | ||
for(var i = 0; i < params.excludeFiles.length; i++) { | ||
if(minimatch(file, pth.resolve('.', params.excludeFiles[i]), {dot: true})) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
function ignoreComments(text) { | ||
var comments = [ | ||
['<!--', '-->'], | ||
['<!ENTITY', '>'], | ||
['<!DOCTYPE', '>'], | ||
['<\\?xml', '\\?>'], | ||
['<!\\[CDATA\\[', '\\]\\]>'] | ||
]; | ||
comments.forEach(function(tag) { | ||
var re = new RegExp(tag[0] + '[^]*?' + tag[1], 'gi'); | ||
text = text.replace(re, ' '); | ||
}); | ||
return text; | ||
} | ||
function findFiles(dir) { | ||
var res = [], | ||
isDir = function(dir) { | ||
return fs.statSync(dir).isDirectory(); | ||
}, | ||
find = function(path) { | ||
var files = fs.readdirSync(path); | ||
files.forEach(function(el) { | ||
var file = pth.join(path, el), | ||
ext = pth.extname(file).toLowerCase(); | ||
if(isExcludedFile(file)) { | ||
return; | ||
} | ||
function ignoreTags(text, tags) { | ||
var bufTags = []; | ||
tags.forEach(function(tag) { | ||
bufTags.push(['<' + tag + '(\\s[^>]*?)?>', '</' + tag + '>']); | ||
}, this); | ||
if(isDir(file)) { | ||
find(file); | ||
} else if(!params.fileExtensions.length || params.fileExtensions.indexOf(ext) !== -1) { | ||
res.push(file); | ||
} | ||
}); | ||
}; | ||
bufTags.forEach(function(tag) { | ||
var re = new RegExp(tag[0] + '[^]*?' + tag[1], 'gi'); | ||
text = text.replace(re, ' '); | ||
}); | ||
if(isDir(dir)) { | ||
find(dir); | ||
} else { | ||
res.push(dir); | ||
} | ||
return text; | ||
} | ||
return res; | ||
function getMaxRequest(settings) { | ||
return settings.maxRequest || 2; | ||
} | ||
@@ -141,6 +142,20 @@ | ||
var format = getFormat(text, settings.format || 'plain'), | ||
var format = getFormat(text, settings.format), | ||
options = getOptions(settings.options), | ||
lang = settings.lang || 'en,ru'; | ||
if(format === 'html' || format === 'markdown') { | ||
if(format === 'markdown') { | ||
text = markdownConverter.makeHtml(text); | ||
} | ||
if(settings.ignoreTags) { | ||
text = ignoreTags(text, settings.ignoreTags); | ||
} | ||
text = ignoreComments(text); | ||
text = text.replace(/<\/?[a-z][^>]+>/gi, ' '); // Strip html tags | ||
text = entities.decodeHTML(text); | ||
} | ||
text = prepareText(text, format); | ||
@@ -153,17 +168,19 @@ | ||
var tasks = [], | ||
texts = splitText(text); | ||
texts = splitText(text), | ||
apiFormat = getApiFormat(format); | ||
texts.forEach(function(el, i) { | ||
printDebug({ | ||
request: i, | ||
format: format, | ||
apiFormat: apiFormat, | ||
lang: lang, | ||
options: options, | ||
text: el.length > 64 ? el.substr(0, 64) + '...' : el | ||
text: el.length > 128 ? el.substr(0, 128) + '...' : el | ||
}); | ||
tasks.push(function(cb) { | ||
request.post(YASPELLER_API_URL, { | ||
request.post(YASPELLER_API_CHECKTEXT, { | ||
form: { | ||
format: format, | ||
format: apiFormat, | ||
lang: lang, | ||
@@ -183,3 +200,3 @@ options: options, | ||
async.parallelLimit(tasks, params.maxRequests, function(err, data) { | ||
async.parallelLimit(tasks, getMaxRequest(settings), function(err, data) { | ||
var buf = mergeResults(data); | ||
@@ -191,4 +208,3 @@ callback(buf.err, buf.data); | ||
function splitText(text) { | ||
var MAX_LEN = 10000, // Max length of text for Yandex.Speller API | ||
texts = [], | ||
var texts = [], | ||
pos = 0, | ||
@@ -198,7 +214,7 @@ newPos = 0; | ||
while(pos < text.length) { | ||
if(pos + MAX_LEN >= text.length) { | ||
if(pos + MAX_LEN_TEXT >= text.length) { | ||
texts.push(text.substring(pos)); | ||
break; | ||
} else { | ||
newPos = getPosition(text, pos + MAX_LEN); | ||
newPos = getPosition(text, pos + MAX_LEN_TEXT); | ||
texts.push(text.substring(pos, newPos)); | ||
@@ -213,3 +229,4 @@ pos = newPos; | ||
function getPosition(text, start) { | ||
for(var i = start - 1; i >= start - 500; i--) { | ||
var depth = 500; | ||
for(var i = start - 1; i >= start - depth; i--) { | ||
var sym = text[i]; | ||
@@ -308,37 +325,2 @@ if(sym === ' ' || sym === '\n' || sym === '\t') { | ||
/** | ||
* Check text in files in folders on typos | ||
* | ||
* @param {string} dir | ||
* @param {Function} commonCallback Common callback | ||
* @param {Object} [settings] See {@tutorial settings} | ||
* @param {Function} [callback] Callback on each file | ||
*/ | ||
function checkDir(dir, commonCallback, settings, callback) { | ||
settings = settings || {}; | ||
if(!fs.existsSync(dir)) { | ||
var obj = [true, Error(dir + ': file or directory is not exists')]; | ||
callback && callback.apply(this, obj); | ||
commonCallback([obj]); | ||
return; | ||
} | ||
var files = findFiles(dir), | ||
tasks = []; | ||
files.forEach(function(file) { | ||
tasks.push(function(cb) { | ||
checkFile(file, function(err, data) { | ||
callback && callback(err, data); | ||
cb(false, [err, data]); | ||
}, settings); | ||
}); | ||
}); | ||
async.parallelLimit(tasks, params.maxRequests, function(err, data) { | ||
commonCallback(data); | ||
}); | ||
} | ||
/** | ||
* Check text on pages of sitemap.xml | ||
@@ -392,3 +374,3 @@ * | ||
async.parallelLimit(tasks, params.maxRequests, function(err, data) { | ||
async.parallelLimit(tasks, getMaxRequest(settings), function(err, data) { | ||
commonCallback(data); | ||
@@ -401,13 +383,6 @@ }); | ||
module.exports = { | ||
setParams: function(p) { | ||
Object.keys(p).forEach(function(key) { | ||
params[key] = p[key]; | ||
}); | ||
}, | ||
isExcludedFile: isExcludedFile, | ||
checkFile: checkFile, | ||
checkSitemap: checkSitemap, | ||
checkText: checkText, | ||
checkFile: checkFile, | ||
checkDir: checkDir, | ||
checkUrl: checkUrl, | ||
checkSitemap: checkSitemap | ||
checkUrl: checkUrl | ||
}; |
@@ -8,5 +8,8 @@ { | ||
"name": "yaspeller", | ||
"main": "index.js", | ||
"main": "./lib/yaspeller.js", | ||
"bin": { | ||
"yaspeller": "./bin/yaspeller" | ||
}, | ||
"description": "Search tool typos in the text, files and websites", | ||
"version": "0.8.1", | ||
"version": "1.0.0pre4", | ||
"license": "MIT", | ||
@@ -22,19 +25,30 @@ "homepage": "https://github.com/hcodes/yaspeller", | ||
"keywords": [ | ||
"typos", "text", "опечатки", "текст", "Яндекс.Спеллер", "Yandex.Speller" | ||
"typo", | ||
"typos", | ||
"text", | ||
"опечатки", | ||
"текст", | ||
"yandex", | ||
"speller", | ||
"Яндекс.Спеллер", | ||
"Yandex.Speller" | ||
], | ||
"dependencies": { | ||
"async": "0.9.0", | ||
"chalk": "0.5.1", | ||
"commander": "~2.6", | ||
"entities": "^1.1.1", | ||
"isutf8": "~1.0", | ||
"lodash": "^3.1.0", | ||
"minimatch": "~2.0.1", | ||
"request": "~2.x", | ||
"isutf8": "~1.0", | ||
"xml2js": "~0.4", | ||
"chalk": "0.5.1", | ||
"minimatch": "~2.0.1" | ||
"showdown": "^0.3.1", | ||
"xml2js": "~0.4" | ||
}, | ||
"devDependencies": { | ||
"mocha": "2.1.x", | ||
"chai": "1.x", | ||
"istanbul": "0.3.x", | ||
"jscs": "1.10.x", | ||
"jshint": "2.x" | ||
"jscs": "1.11.x", | ||
"jshint": "2.x", | ||
"mocha": "2.1.x" | ||
}, | ||
@@ -51,11 +65,7 @@ "optionalDependencies": {}, | ||
"unit-test": "./node_modules/.bin/mocha -u bdd -R spec --recursive test", | ||
"unit-test-coverage": "./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha -- -u bdd -R spec --recursive test" | ||
"unit-test-coverage": "./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha -- -u bdd -R spec --recursive test" | ||
}, | ||
"bin": { | ||
"yaspeller": "./bin/cli.js" | ||
}, | ||
"files": [ | ||
"bin", | ||
"lib", | ||
"index.js", | ||
".yaspellerrc.default.json", | ||
@@ -62,0 +72,0 @@ "LICENSE.md" |
@@ -9,2 +9,4 @@ yaspeller | ||
This README is also available [in Russian](./README.ru.md). | ||
Search tool typos in the text, files and websites. | ||
@@ -14,2 +16,4 @@ | ||
![yaspeller](https://raw.githubusercontent.com/hcodes/yaspeller/master/images/cli.png) | ||
## Installation | ||
@@ -23,3 +27,3 @@ `npm install yaspeller -g` | ||
+ `yaspeller README.md` — search typos in the file. | ||
+ `yaspeller ./texts/` — finding typos in files (.xml, .html, .htm, .txt, .text, .svg, .md, .wiki) in the folder. | ||
+ `yaspeller -e ".md,.html,.js" ./texts/` — finding typos in files in the folder. | ||
+ `yaspeller http://www.yandex.ru/` — search typos in the page. | ||
@@ -31,7 +35,7 @@ + `yaspeller http://bem.info/sitemap.xml` — search typos at the addresses specified in the sitemap.xml. | ||
#### `-f, --format <value>` | ||
Formats: `plain` or `html`. | ||
Default: `plain`. | ||
Formats: `plain`, `html`, `markdown` или `auto`.<br/> | ||
Default: `auto`. | ||
#### `-l, --lang <value>` | ||
Languages: `en`, `kk`, `ru` or `uk`.<br/> | ||
Languages: `en`, `ru` or `uk`.<br/> | ||
Default: `en,ru`. | ||
@@ -42,3 +46,3 @@ | ||
#### `--file-extensions <value>` | ||
#### `-e, --file-extensions <value>` | ||
Set file extensions to search for files in a folder.<br/> | ||
@@ -57,2 +61,7 @@ Example: `.md,.htm,.txt`. | ||
#### `--report <type>` | ||
Set type of report: `console`, `html` or `json`.<br/> | ||
Default: `console`<br/> | ||
Example: `console,html,custom_report.js` | ||
#### `--by-words` | ||
@@ -68,2 +77,6 @@ Do not use a dictionary environment (context) during the scan.<br/> | ||
#### `--ignore-tags <tags>` | ||
Ignore HTML tags.<br/> | ||
Default: `code,kbd,object,samp,script,style,var` | ||
#### `--ignore-capitalization` | ||
@@ -114,7 +127,6 @@ Ignore the incorrect use of UPPERCASE / lowercase letters, for example, in the word `moscow`. | ||
".git", | ||
"yaspeller", | ||
"libs", | ||
"node_modules", | ||
"libs" | ||
"yaspeller" | ||
], | ||
"format": "auto", | ||
"lang": "ru", | ||
@@ -132,2 +144,29 @@ "fileExtensions": [ | ||
**Advanced example:** | ||
```JSON | ||
{ | ||
"excludeFiles": [ | ||
".git", | ||
"yaspeller", | ||
"node_modules", | ||
"libs" | ||
], | ||
"format": "html", | ||
"lang": "en", | ||
"fileExtensions": [ | ||
".md", | ||
".js", | ||
".css" | ||
], | ||
"report": ["console", "html"], | ||
"dictionary": [ | ||
"someword1" | ||
], | ||
"ignoreTags": ["code", "script"], | ||
"ignoreUrls": true, | ||
"findRepeatWords": true, | ||
"maxRequests": 5 | ||
} | ||
``` | ||
| Property | Type | Details | | ||
@@ -140,5 +179,7 @@ |----------|------|---------| | ||
| `dictionary` | `Array` | [`--dictionary`](#--dictionary-file) | | ||
| `report` | `Array` | [`--report`](#--report-type) | | ||
| `byWords` | `Boolean` | [`--by-words`](#--by-words) | | ||
| `findRepeatWords` | `Boolean` | [`--find-repeat-words`](#--find-repeat-words) | | ||
| `flagLatin` | `Boolean` | [`--flag-latin`](#--flag-latin) | | ||
| `ignoreTags` | `Array` | [`--ignore-tags`](#--ignore-tags-tags) | | ||
| `ignoreCapitalization` | `Boolean` | [`--ignore-capitalization`](#--ignore-capitalization) | | ||
@@ -145,0 +186,0 @@ | `ignoreDigits` | `Boolean` | [`--ignore-digits`](#--ignore-digits) | |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Manifest confusion
Supply chain riskThis package has inconsistent metadata. This could be malicious or caused by an error when publishing the package.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
42093
15
889
185
10
6
2
+ Addedentities@^1.1.1
+ Addedlodash@^3.1.0
+ Addedshowdown@^0.3.1
+ Addedentities@1.1.2(transitive)
+ Addedlodash@3.10.1(transitive)
+ Addedshowdown@0.3.4(transitive)