mediawiki-title
Advanced tools
Comparing version 0.1.2 to 0.2.0
134
lib/index.js
"use strict"; | ||
var P = require('bluebird'); | ||
var preq = require('preq'); | ||
var sanitizeIP = require('./ip'); | ||
@@ -25,9 +23,24 @@ var utils = require('./utils'); | ||
/** | ||
* Information about a wikimedia site required to make correct | ||
* normalization. | ||
* | ||
* @typedef SiteInfo | ||
* @type Object | ||
* @property {string} lang Site language code. | ||
* @property {string} legaltitlechars A perl-like regex for characters | ||
* allowed in the page title. | ||
* @property {Object} namespaces Site namespaces info in the same format as | ||
* returned by PHP api. | ||
* @property {Object} namespacealiases Site namespace aliases in the same format | ||
* as returned by PHP api. | ||
*/ | ||
/** | ||
* Creates an instance of title normalizer. | ||
* | ||
* @param {Object} options the normalizer options | ||
* @param {Function(string):string} options.apiURI a function | ||
* @param {Function(string):Promise(SiteInfo)} options.getSiteInfo a function | ||
* that takes a domain string and returns back | ||
* an API URI that needs to be contacted to get | ||
* the site information used for normalization. | ||
* the information about the site. The info is cached | ||
* in-memory, so there's no need for external caching. | ||
* @constructor | ||
@@ -37,3 +50,3 @@ * @public | ||
function Normalizer(options) { | ||
if (!options || typeof options.apiURI !== 'function') { | ||
if (!options || typeof options.getSiteInfo !== 'function') { | ||
throw new TypeError('Invalid options for Normalizer constructor'); | ||
@@ -67,23 +80,15 @@ } | ||
if (self._siteInfo[domain]) { | ||
return P.resolve(self._siteInfo[domain]); | ||
return self._siteInfo[domain]; | ||
} else { | ||
return preq.post({ | ||
uri: self._options.apiURI(domain), | ||
body: { | ||
action: 'query', | ||
meta: 'siteinfo', | ||
siprop: 'general|namespaces|namespacealiases', | ||
format: 'json' | ||
} | ||
}) | ||
.then(function(res) { | ||
res = res.body.query; | ||
self._siteInfo[domain] = { | ||
lang: res.general.lang, | ||
invalidTitleRegex: self._createInvalidTitleRegex(res.general.legaltitlechars), | ||
namespaces: res.namespaces, | ||
namespacealiases: res.namespacealiases | ||
self._siteInfo[domain] = self._options.getSiteInfo(domain) | ||
.then(function(info) { | ||
return { | ||
lang: info.lang, | ||
invalidTitleRegex: self._createInvalidTitleRegex(info.legaltitlechars), | ||
namespaces: info.namespaces, | ||
namespacealiases: info.namespacealiases | ||
}; | ||
return self._siteInfo[domain]; | ||
}); | ||
return self._siteInfo[domain]; | ||
} | ||
@@ -230,3 +235,4 @@ }; | ||
var self = this; | ||
return P.try(function() { | ||
return self._getSiteInfo(domain) | ||
.then(function(siteInfo) { | ||
title = title.replace(/ /g, '_') | ||
@@ -247,50 +253,48 @@ // Strip Unicode bidi override characters. | ||
self._checkEmptyTitle(title); | ||
return self._getSiteInfo(domain) | ||
.then(function(siteInfo) { | ||
var result = self._splitNamespace(title, siteInfo); | ||
if (result.namespace === NS_TALK) { | ||
self._checkTalkNamespace(result.title, siteInfo); | ||
} | ||
var fragmentIndex = result.title.indexOf('#'); | ||
if (fragmentIndex >= 0) { | ||
var fragment = result.title.substr(fragmentIndex); | ||
result.fragment = fragment.substr(1).replace(/_/g, ' '); | ||
result.title = result.title | ||
.substring(result.title, result.title.length - fragment.length) | ||
.replace('/_*$/', ''); | ||
} | ||
self._checkLegalTitleCharacters(result.title, siteInfo); | ||
self._checkRelativeTitle(result.title); | ||
// Magic tilde sequences? Nu-uh! | ||
if (result.title.indexOf('~~~') !== -1) { | ||
throw new utils.TitleError({ | ||
type: 'title-invalid-magic-tilde', | ||
title: title | ||
}); | ||
} | ||
self._checkMaxLength(result.title, result.namespace); | ||
var result = self._splitNamespace(title, siteInfo); | ||
if (result.namespace === NS_TALK) { | ||
self._checkTalkNamespace(result.title, siteInfo); | ||
} | ||
var fragmentIndex = result.title.indexOf('#'); | ||
if (fragmentIndex >= 0) { | ||
var fragment = result.title.substr(fragmentIndex); | ||
result.fragment = fragment.substr(1).replace(/_/g, ' '); | ||
result.title = result.title | ||
.substring(result.title, result.title.length - fragment.length) | ||
.replace('/_*$/', ''); | ||
} | ||
result = self._capitalizeTitle(result, siteInfo); | ||
self._checkLegalTitleCharacters(result.title, siteInfo); | ||
self._checkRelativeTitle(result.title); | ||
// Magic tilde sequences? Nu-uh! | ||
if (result.title.indexOf('~~~') !== -1) { | ||
throw new utils.TitleError({ | ||
type: 'title-invalid-magic-tilde', | ||
title: title | ||
}); | ||
} | ||
self._checkMaxLength(result.title, result.namespace); | ||
if (result.namespace !== NS_MAIN) { | ||
self._checkEmptyTitle(result.title); | ||
} | ||
result = self._capitalizeTitle(result, siteInfo); | ||
if (result.namespace === NS_USER || result.namespace === NS_USER_TALK) { | ||
result.title = sanitizeIP(result.title); | ||
} | ||
if (result.namespace !== NS_MAIN) { | ||
self._checkEmptyTitle(result.title); | ||
} | ||
var normalized = result.title; | ||
if (result.namespace === NS_USER || result.namespace === NS_USER_TALK) { | ||
result.title = sanitizeIP(result.title); | ||
} | ||
if (result.namespace !== NS_MAIN) { | ||
normalized = siteInfo.namespaces[result.namespace]['*'] + ':' + normalized; | ||
} | ||
var normalized = result.title; | ||
if (result.fragment) { | ||
normalized = normalized + '#' + result.fragment; | ||
} | ||
if (result.namespace !== NS_MAIN) { | ||
normalized = siteInfo.namespaces[result.namespace]['*'] + ':' + normalized; | ||
} | ||
return normalized; | ||
}); | ||
if (result.fragment) { | ||
normalized = normalized + '#' + result.fragment; | ||
} | ||
return normalized; | ||
}); | ||
@@ -297,0 +301,0 @@ }; |
{ | ||
"name": "mediawiki-title", | ||
"version": "0.1.2", | ||
"version": "0.2.0", | ||
"description": "Title normalization library for mediawiki", | ||
@@ -26,6 +26,2 @@ "main": "lib/index.js", | ||
}, | ||
"dependencies": { | ||
"bluebird": "^3.1.1", | ||
"preq": "^0.4.8" | ||
}, | ||
"devDependencies": { | ||
@@ -37,4 +33,5 @@ "coveralls": "^2.11.6", | ||
"mocha-jshint": "^2.2.6", | ||
"mocha-lcov-reporter": "^1.0.0" | ||
"mocha-lcov-reporter": "^1.0.0", | ||
"preq": "^0.4.8" | ||
} | ||
} |
@@ -1,2 +0,2 @@ | ||
# mediawiki-title [![npm version](https://badge.fury.io/js/mediawiki-title.svg)](https://badge.fury.io/js/mediawiki-title) [![Build Status](https://travis-ci.org/wikimedia/mediawiki-title.svg?branch=master)](https://travis-ci.org/wikimedia/mediawiki-title) [![Coverage Status](https://coveralls.io/repos/github/wikimedia/analytics-mediawiki-storage/badge.svg?branch=master)](https://coveralls.io/github/wikimedia/analytics-mediawiki-storage?branch=master) [![Dependencies](https://david-dm.org/wikimedia/mediawiki-title.svg?branch=master)](https://david-dm.org/wikimedia/mediawiki-title?branch=master) | ||
# mediawiki-title [![npm version](https://badge.fury.io/js/mediawiki-title.svg)](https://badge.fury.io/js/mediawiki-title) [![Build Status](https://travis-ci.org/wikimedia/mediawiki-title.svg?branch=master)](https://travis-ci.org/wikimedia/mediawiki-title) [![Coverage Status](https://coveralls.io/repos/github/wikimedia/mediawiki-title/badge.svg?branch=master)](https://coveralls.io/github/wikimedia/mediawiki-title?branch=master) [![Dependencies](https://david-dm.org/wikimedia/mediawiki-title.svg?branch=master)](https://david-dm.org/wikimedia/mediawiki-title?branch=master) | ||
@@ -7,4 +7,4 @@ Mediawiki title normalizetion, that conforms to the normalization rules used in [MediaWiki Core](https://www.mediawiki.org/wiki/API:Query#Title_normalization). | ||
<a name="API"></a> | ||
## API | ||
<a name="Normalizer"></a> | ||
## Normalizer | ||
@@ -19,6 +19,7 @@ * [Normalizer](#Normalizer) | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| options | <code>Object</code> | the normalizer options | | ||
| options.apiURI | <code>function</code> | a function that takes a domain string and returns back an API URI that needs to be contacted to get the site information used for normalization. | | ||
| options.getSiteInfo | <code>function</code> | a function that takes a domain string and returns back the information about the site. The info is cached in-memory, so there's no need for external caching. | | ||
@@ -38,2 +39,16 @@ <a name="Normalizer+normalize"></a> | ||
<a name="SiteInfo"></a> | ||
## SiteInfo : <code>Object</code> | ||
Information about a wikimedia site required to make correct | ||
normalization. | ||
**Properties** | ||
| Name | Type | Description | | ||
| --- | --- | --- | | ||
| lang | <code>string</code> | Site language code. | | ||
| legaltitlechars | <code>string</code> | A perl-like regex for characters allowed in the page title. | | ||
| namespaces | <code>Object</code> | Site namespaces info in the same format as returned by PHP api. | | ||
| namespacealiases | <code>Object</code> | Site namespace aliases in the same format as returned by PHP api. | | ||
## Usage | ||
@@ -47,3 +62,16 @@ | ||
var normalizer = new Normalizer({ | ||
apiURI: function(domain) { return 'https://' + domain + '/w/api.php'; } | ||
getSiteInfo: function(domain) { | ||
return { | ||
lang: 'en', | ||
legaltitlechars: " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+", | ||
namespaces: { | ||
"0": { | ||
id: 0, | ||
case: "first-letter", | ||
content: "", | ||
"*": "" | ||
}, | ||
} | ||
}; | ||
} | ||
}); | ||
@@ -50,0 +78,0 @@ |
@@ -6,2 +6,4 @@ 'use strict'; | ||
var utils = require('../lib/utils'); | ||
var preq = require('preq'); | ||
var P = require('bluebird'); | ||
@@ -14,4 +16,20 @@ // Run jshint as part of normal testing | ||
var normalizer = new Normalizer({ | ||
apiURI: function (domain) { | ||
return 'https://' + domain + '/w/api.php'; | ||
getSiteInfo: function (domain) { | ||
return preq.post({ | ||
uri: 'https://' + domain + '/w/api.php', | ||
body: { | ||
action: 'query', | ||
meta: 'siteinfo', | ||
siprop: 'general|namespaces|namespacealiases', | ||
format: 'json' | ||
} | ||
}) | ||
.then(function(res) { | ||
return { | ||
lang: res.body.query.general.lang, | ||
legaltitlechars: res.body.query.general.legaltitlechars, | ||
namespaces: res.body.query.namespaces, | ||
namespacealiases: res.body.query.namespacealiases | ||
} | ||
}); | ||
} | ||
@@ -285,2 +303,29 @@ }); | ||
}); | ||
it('Should fetch domains', function() { | ||
return preq.get({ | ||
uri: 'https://en.wikipedia.org/w/api.php?action=sitematrix&format=json' | ||
}) | ||
.then(function(res) { | ||
return Object.keys(res.body.sitematrix) | ||
.filter(function(idx) { | ||
return idx !== 'count' && idx !== 'specials'; | ||
}) | ||
.map(function (idx) { | ||
return res.body.sitematrix[idx].site[0].url.replace(/^https?:\/\//, ''); | ||
}); | ||
}) | ||
.then(function (domains) { | ||
describe('Various domains', function() { | ||
domains.forEach(function (domain) { | ||
it('Should work for ' + domain, function() { | ||
return normalizer.normalize('1', domain) | ||
.then(function (res) { | ||
assert.deepEqual(res, '1'); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
35260
0
793
89
7
- Removedbluebird@^3.1.1
- Removedpreq@^0.4.8
- Removedajv@6.12.6(transitive)
- Removedasn1@0.2.6(transitive)
- Removedassert-plus@1.0.0(transitive)
- Removedasynckit@0.4.0(transitive)
- Removedaws-sign2@0.7.0(transitive)
- Removedaws4@1.13.2(transitive)
- Removedbcrypt-pbkdf@1.0.2(transitive)
- Removedbluebird@3.7.2(transitive)
- Removedcaseless@0.12.0(transitive)
- Removedcombined-stream@1.0.8(transitive)
- Removedcore-util-is@1.0.2(transitive)
- Removeddashdash@1.14.1(transitive)
- Removeddelayed-stream@1.0.0(transitive)
- Removedecc-jsbn@0.1.2(transitive)
- Removedextend@3.0.2(transitive)
- Removedextsprintf@1.3.0(transitive)
- Removedfast-deep-equal@3.1.3(transitive)
- Removedfast-json-stable-stringify@2.1.0(transitive)
- Removedforever-agent@0.6.1(transitive)
- Removedform-data@2.3.3(transitive)
- Removedgetpass@0.1.7(transitive)
- Removedhar-schema@2.0.0(transitive)
- Removedhar-validator@5.1.5(transitive)
- Removedhttp-signature@1.2.0(transitive)
- Removedis-typedarray@1.0.0(transitive)
- Removedisstream@0.1.2(transitive)
- Removedjsbn@0.1.1(transitive)
- Removedjson-schema@0.4.0(transitive)
- Removedjson-schema-traverse@0.4.1(transitive)
- Removedjson-stringify-safe@5.0.1(transitive)
- Removedjsprim@1.4.2(transitive)
- Removedmime-db@1.52.0(transitive)
- Removedmime-types@2.1.35(transitive)
- Removedoauth-sign@0.9.0(transitive)
- Removedperformance-now@2.1.0(transitive)
- Removedpreq@0.4.12(transitive)
- Removedpsl@1.13.0(transitive)
- Removedpunycode@2.3.1(transitive)
- Removedqs@6.5.3(transitive)
- Removedrequest@2.88.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedsafer-buffer@2.1.2(transitive)
- Removedsshpk@1.18.0(transitive)
- Removedtough-cookie@2.5.0(transitive)
- Removedtunnel-agent@0.6.0(transitive)
- Removedtweetnacl@0.14.5(transitive)
- Removeduri-js@4.4.1(transitive)
- Removeduuid@3.4.0(transitive)
- Removedverror@1.10.0(transitive)