horseman-article-parser
Advanced tools
Comparing version 0.0.2 to 0.0.3
{ | ||
"name": "horseman-article-parser", | ||
"version": "0.0.2", | ||
"version": "0.0.3", | ||
"description": "Web Page Inspection Tool. Sentiment Analysis, Keyword Extraction, Named Entity Recognition & Spell Check", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
"lint": "./node_modules/.bin/eslint index.js --fix" | ||
}, | ||
@@ -20,4 +20,28 @@ "repository": { | ||
"dependencies": { | ||
"user": "0.0.0" | ||
"clean-html": "^1.5.0", | ||
"compromise": "^11.12.0", | ||
"dictionary-en-gb": "^2.0.0", | ||
"html-to-text": "^4.0.0", | ||
"htmldom": "^3.0.6", | ||
"lodash": "^4.17.4", | ||
"node-horseman": "^3.3.0", | ||
"node-readability": "^2.2.0", | ||
"phantomjs-prebuilt": "^2.1.14", | ||
"retext": "^5.0.0", | ||
"retext-keywords": "^4.0.0", | ||
"retext-spell": "^2.3.1", | ||
"sentiment": "^5.0.1", | ||
"vfile-reporter-json": "^1.0.2" | ||
}, | ||
"engines": { | ||
"yarn": ">= 1.0.0" | ||
}, | ||
"devDependencies": { | ||
"eslint": "^5.6.0", | ||
"eslint-config-standard": "^12.0.0", | ||
"eslint-plugin-import": "^2.14.0", | ||
"eslint-plugin-node": "^7.0.1", | ||
"eslint-plugin-promise": "^4.0.1", | ||
"eslint-plugin-standard": "^4.0.0" | ||
} | ||
} | ||
} |
@@ -1,1 +0,71 @@ | ||
# horseman-article-parser | ||
# Horseman Article Parser | ||
A web page article parser which returns an object containing the article's formatted text & other attributes including sentiment, keyphrases, people, places, organisations and spelling suggestions. | ||
### Prerequisites | ||
Node.js & NPM | ||
### Install | ||
``` | ||
npm install horseman-article-parser --save | ||
``` | ||
### Usage Example | ||
``` | ||
var parser = require('horseman-article-parser'); | ||
var params = { | ||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', | ||
config: { timeout: 10000, cookies: './cookies.json', bluebirdDebug: false, injectJquery: true }, | ||
url: "https://www.theguardian.com/politics/2018/sep/24/theresa-may-calls-for-immigration-based-on-skills-and-wealth" | ||
} | ||
parser.parseArticle(params) | ||
.then(function (article) { | ||
var response = { | ||
title: article.title.text, | ||
metadescription: article.meta.description.text, | ||
url: article.url, | ||
sentiment: article.sentiment, | ||
keyphrases: article.processed.keyphrases, | ||
people: article.people, | ||
orgs: article.orgs, | ||
places: article.places, | ||
text: { | ||
formatted: article.processed.formattedText, | ||
html: article.processed.html | ||
}, | ||
image: article.meta['og:image'], | ||
screenshot: article.mobile, | ||
spelling: article.spelling | ||
} | ||
console.log(response); | ||
}) | ||
.catch(function (error) { | ||
console.log(error.message) | ||
console.log(error.stack); | ||
}) | ||
``` | ||
## Development | ||
Please feel free to fork the repo or open pull requests to the development branch. I've used [eslint](https://eslint.org/) for linting & [yarn](https://yarnpkg.com/en/) for dependency management. | ||
Build the dependencies with: | ||
``` | ||
yarn | ||
``` | ||
Lint the index.js file with: | ||
``` | ||
yarn lint | ||
``` | ||
## License | ||
This project is licensed under the GNU GENERAL PUBLIC LICENSE Version 3 - see the [LICENSE.md](LICENSE.md) file for details |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Empty package
Supply chain riskPackage does not contain any code. It may be removed, is name squatting, or the result of a faulty package publish.
Found 1 instance in 1 package
No tests
QualityPackage does not have any tests. This is a strong signal of a poorly maintained or low quality package.
Found 1 instance in 1 package
124836
7
394
1
71
14
6
+ Addedclean-html@^1.5.0
+ Addedcompromise@^11.12.0
+ Addeddictionary-en-gb@^2.0.0
+ Addedhtml-to-text@^4.0.0
+ Addedhtmldom@^3.0.6
+ Addedlodash@^4.17.4
+ Addednode-horseman@^3.3.0
+ Addednode-readability@^2.2.0
+ Addedphantomjs-prebuilt@^2.1.14
+ Addedretext@^5.0.0
+ Addedretext-keywords@^4.0.0
+ Addedretext-spell@^2.3.1
+ Addedsentiment@^5.0.1
+ Addedvfile-reporter-json@^1.0.2
+ Added@isaacs/cliui@8.0.2(transitive)
+ Added@one-ini/wasm@0.1.1(transitive)
+ Added@pkgjs/parseargs@0.11.0(transitive)
+ Addedabbrev@2.0.0(transitive)
+ Addedacorn@2.7.0(transitive)
+ Addedacorn-globals@1.0.9(transitive)
+ Addedajv@6.12.6(transitive)
+ Addedalign-text@0.1.4(transitive)
+ Addedansi-regex@5.0.16.0.1(transitive)
+ Addedansi-styles@4.3.06.2.1(transitive)
+ Addedarray-iterate@1.1.4(transitive)
+ Addedasn1@0.1.110.2.6(transitive)
+ Addedassert-plus@0.1.51.0.0(transitive)
+ Addedasync@0.9.2(transitive)
+ Addedasynckit@0.4.0(transitive)
+ Addedaws-sign2@0.5.00.7.0(transitive)
+ Addedaws4@1.13.0(transitive)
+ Addedbail@1.0.5(transitive)
+ Addedbalanced-match@1.0.2(transitive)
+ Addedbcrypt-pbkdf@1.0.2(transitive)
+ Addedbluebird@3.7.2(transitive)
+ Addedboom@0.4.2(transitive)
+ Addedbrace-expansion@2.0.1(transitive)
+ Addedbrowser-request@0.3.3(transitive)
+ Addedbuffer-crc32@0.2.13(transitive)
+ Addedbuffer-from@1.1.2(transitive)
+ Addedcamelcase@1.2.1(transitive)
+ Addedcaseless@0.12.0(transitive)
+ Addedcenter-align@0.1.3(transitive)
+ Addedclean-html@1.5.0(transitive)
+ Addedcliui@2.1.0(transitive)
+ Addedclone@1.0.4(transitive)
+ Addedcolor-convert@2.0.1(transitive)
+ Addedcolor-name@1.1.4(transitive)
+ Addedcombined-stream@0.0.71.0.8(transitive)
+ Addedcommander@10.0.1(transitive)
+ Addedcompromise@11.14.3(transitive)
+ Addedconcat-stream@1.6.2(transitive)
+ Addedconfig-chain@1.1.13(transitive)
+ Addedcookies.txt@0.1.2(transitive)
+ Addedcore-util-is@1.0.21.0.3(transitive)
+ Addedcross-spawn@7.0.3(transitive)
+ Addedcryptiles@0.2.2(transitive)
+ Addedcssdom@1.0.23(transitive)
+ Addedcssom@0.3.8(transitive)
+ Addedcssstyle@0.2.37(transitive)
+ Addedctype@0.5.3(transitive)
+ Addeddashdash@1.14.1(transitive)
+ Addeddata-uri-to-buffer@0.0.4(transitive)
+ Addeddebug@2.6.9(transitive)
+ Addeddecamelize@1.2.0(transitive)
+ Addeddeep-is@0.1.4(transitive)
+ Addeddefaults@1.0.4(transitive)
+ Addeddelayed-stream@0.0.51.0.0(transitive)
+ Addeddictionary-en-gb@2.4.0(transitive)
+ Addeddom-serializer@0.2.2(transitive)
+ Addeddomelementtype@1.3.12.3.0(transitive)
+ Addeddomhandler@2.4.2(transitive)
+ Addeddomutils@1.7.0(transitive)
+ Addedeastasianwidth@0.2.0(transitive)
+ Addedecc-jsbn@0.1.2(transitive)
+ Addededitorconfig@1.0.4(transitive)
+ Addedefrt-unpack@2.2.0(transitive)
+ Addedemoji-regex@8.0.09.2.2(transitive)
+ Addedencoding@0.1.13(transitive)
+ Addedentities@1.1.22.2.0(transitive)
+ Addedes6-promise@4.2.8(transitive)
+ Addedescodegen@1.14.3(transitive)
+ Addedesprima@4.0.1(transitive)
+ Addedestraverse@4.3.0(transitive)
+ Addedesutils@2.0.3(transitive)
+ Addedextend@3.0.2(transitive)
+ Addedextract-zip@1.7.0(transitive)
+ Addedextsprintf@1.3.0(transitive)
+ Addedfast-deep-equal@3.1.3(transitive)
+ Addedfast-json-stable-stringify@2.1.0(transitive)
+ Addedfast-levenshtein@2.0.6(transitive)
+ Addedfd-slicer@1.1.0(transitive)
+ Addedforeground-child@3.1.1(transitive)
+ Addedforever-agent@0.5.20.6.1(transitive)
+ Addedform-data@0.1.42.3.3(transitive)
+ Addedfs-extra@1.0.0(transitive)
+ Addedgetpass@0.1.7(transitive)
+ Addedglob@10.4.1(transitive)
+ Addedgraceful-fs@4.2.11(transitive)
+ Addedhar-schema@2.0.0(transitive)
+ Addedhar-validator@5.1.5(transitive)
+ Addedhasha@2.2.0(transitive)
+ Addedhawk@1.1.1(transitive)
+ Addedhe@1.2.0(transitive)
+ Addedhoek@0.9.1(transitive)
+ Addedhtml-to-text@4.0.0(transitive)
+ Addedhtmldom@3.0.9(transitive)
+ Addedhtmlparser2@3.10.1(transitive)
+ Addedhttp-signature@0.10.11.2.0(transitive)
+ Addediconv-lite@0.6.3(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedini@1.3.8(transitive)
+ Addedis-buffer@1.1.62.0.5(transitive)
+ Addedis-fullwidth-code-point@3.0.0(transitive)
+ Addedis-plain-obj@1.1.0(transitive)
+ Addedis-stream@1.1.0(transitive)
+ Addedis-typedarray@1.0.0(transitive)
+ Addedisarray@1.0.0(transitive)
+ Addedisexe@2.0.0(transitive)
+ Addedisstream@0.1.2(transitive)
+ Addedjackspeak@3.4.0(transitive)
+ Addedjs-beautify@1.15.1(transitive)
+ Addedjs-cookie@3.0.5(transitive)
+ Addedjsbn@0.1.1(transitive)
+ Addedjsdom@6.5.1(transitive)
+ Addedjson-schema@0.4.0(transitive)
+ Addedjson-schema-traverse@0.4.1(transitive)
+ Addedjson-stringify-safe@5.0.1(transitive)
+ Addedjsonfile@2.4.0(transitive)
+ Addedjsprim@1.4.2(transitive)
+ Addedkew@0.7.0(transitive)
+ Addedkind-of@3.2.2(transitive)
+ Addedklaw@1.3.1(transitive)
+ Addedlazy-cache@1.0.4(transitive)
+ Addedlevn@0.3.0(transitive)
+ Addedlodash@4.17.21(transitive)
+ Addedlodash.includes@4.3.0(transitive)
+ Addedlongest@1.0.1(transitive)
+ Addedlru-cache@10.2.2(transitive)
+ Addedmime@1.2.11(transitive)
+ Addedmime-db@1.52.0(transitive)
+ Addedmime-types@1.0.22.1.35(transitive)
+ Addedminimatch@9.0.19.0.4(transitive)
+ Addedminimist@0.0.101.2.8(transitive)
+ Addedminipass@7.1.2(transitive)
+ Addedmkdirp@0.5.6(transitive)
+ Addedms@2.0.0(transitive)
+ Addednlcst-is-literal@1.2.2(transitive)
+ Addednlcst-to-string@2.0.4(transitive)
+ Addednode-horseman@3.3.0(transitive)
+ Addednode-phantom-simple@2.2.4(transitive)
+ Addednode-readability@2.2.0(transitive)
+ Addednode-uuid@1.4.8(transitive)
+ Addednopt@7.2.1(transitive)
+ Addednspell@2.1.5(transitive)
+ Addednwmatcher@1.4.4(transitive)
+ Addedoauth-sign@0.3.00.9.0(transitive)
+ Addedoptimist@0.6.1(transitive)
+ Addedoptionator@0.8.3(transitive)
+ Addedparse-latin@4.3.0(transitive)
+ Addedparse5@1.5.1(transitive)
+ Addedpath-key@3.1.1(transitive)
+ Addedpath-scurry@1.11.1(transitive)
+ Addedpend@1.2.0(transitive)
+ Addedperformance-now@2.1.0(transitive)
+ Addedphantomjs-prebuilt@2.1.16(transitive)
+ Addedpinkie@2.0.4(transitive)
+ Addedpinkie-promise@2.0.1(transitive)
+ Addedpos@0.4.2(transitive)
+ Addedprelude-ls@1.1.2(transitive)
+ Addedprocess-nextick-args@2.0.1(transitive)
+ Addedprogress@1.1.8(transitive)
+ Addedproto-list@1.2.4(transitive)
+ Addedpsl@1.9.0(transitive)
+ Addedpunycode@2.3.1(transitive)
+ Addedqs@1.0.26.5.3(transitive)
+ Addedquotation@1.1.3(transitive)
+ Addedreadable-stream@2.3.83.6.2(transitive)
+ Addedrepeat-string@1.6.1(transitive)
+ Addedreplace-ext@1.0.0(transitive)
+ Addedrequest@2.40.02.88.2(transitive)
+ Addedrequest-progress@2.0.1(transitive)
+ Addedretext@5.0.0(transitive)
+ Addedretext-keywords@4.0.3(transitive)
+ Addedretext-latin@2.0.4(transitive)
+ Addedretext-pos@2.0.2(transitive)
+ Addedretext-spell@2.4.1(transitive)
+ Addedretext-stringify@2.0.4(transitive)
+ Addedright-align@0.1.3(transitive)
+ Addedsafe-buffer@5.1.25.2.1(transitive)
+ Addedsafer-buffer@2.1.2(transitive)
+ Addedsemver@7.6.2(transitive)
+ Addedsentiment@5.0.2(transitive)
+ Addedshebang-command@2.0.0(transitive)
+ Addedshebang-regex@3.0.0(transitive)
+ Addedsignal-exit@4.1.0(transitive)
+ Addedsntp@0.2.4(transitive)
+ Addedsource-map@0.5.70.6.1(transitive)
+ Addedsshpk@1.18.0(transitive)
+ Addedstemmer@1.0.5(transitive)
+ Addedstring-width@4.2.35.1.2(transitive)
+ Addedstring_decoder@1.1.1(transitive)
+ Addedstringstream@0.0.6(transitive)
+ Addedstrip-ansi@6.0.17.1.0(transitive)
+ Addedsymbol-tree@3.2.4(transitive)
+ Addedthrottleit@1.0.1(transitive)
+ Addedtough-cookie@2.5.0(transitive)
+ Addedtr46@0.0.3(transitive)
+ Addedtrough@1.0.5(transitive)
+ Addedtunnel-agent@0.4.30.6.0(transitive)
+ Addedtweetnacl@0.14.5(transitive)
+ Addedtype-check@0.3.2(transitive)
+ Addedtypedarray@0.0.6(transitive)
+ Addeduglify-js@2.8.29(transitive)
+ Addeduglify-to-browserify@1.0.2(transitive)
+ Addedunherit@1.1.3(transitive)
+ Addedunified@6.2.0(transitive)
+ Addedunist-util-is@3.0.0(transitive)
+ Addedunist-util-modify-children@2.0.0(transitive)
+ Addedunist-util-stringify-position@1.1.2(transitive)
+ Addedunist-util-visit@1.4.1(transitive)
+ Addedunist-util-visit-children@1.1.4(transitive)
+ Addedunist-util-visit-parents@2.1.2(transitive)
+ Addeduri-js@4.4.1(transitive)
+ Addedutil-deprecate@1.0.2(transitive)
+ Addedutils-extend@1.0.8(transitive)
+ Addeduuid@3.4.0(transitive)
+ Addedverror@1.10.0(transitive)
+ Addedvfile@2.3.0(transitive)
+ Addedvfile-message@1.1.1(transitive)
+ Addedvfile-reporter-json@1.0.4(transitive)
+ Addedwhatwg-url-compat@0.6.5(transitive)
+ Addedwhich@1.3.12.0.2(transitive)
+ Addedwindow-size@0.1.0(transitive)
+ Addedword-wrap@1.2.5(transitive)
+ Addedwordwrap@0.0.20.0.3(transitive)
+ Addedwrap-ansi@7.0.08.1.0(transitive)
+ Addedx-is-string@0.1.0(transitive)
+ Addedxml-name-validator@2.0.1(transitive)
+ Addedxmlhttprequest@1.8.0(transitive)
+ Addedxtend@4.0.2(transitive)
+ Addedyargs@3.10.0(transitive)
+ Addedyauzl@2.10.0(transitive)
- Removeduser@0.0.0
- Removeduser@0.0.0(transitive)