mediawiki-title
Advanced tools
Comparing version 0.2.4 to 0.3.0
216
lib/index.js
@@ -6,8 +6,2 @@ "use strict"; | ||
var NS_SPECIAL = '-1'; | ||
var NS_MAIN = '0'; | ||
var NS_TALK = '1'; | ||
var NS_USER = '2'; | ||
var NS_USER_TALK = '3'; | ||
// Polyfill for array.find for node 0.10 support. | ||
@@ -31,26 +25,21 @@ function arrayFind(array, predicate) { | ||
* @property {string} legaltitlechars A perl-like regex for characters | ||
* allowed in the page title. | ||
* allowed in the page title. | ||
* @property {string} case Whether to capitalize the first letter of the title. | ||
* Could be obtained from the `general` section of the `siteInfo` php API response. | ||
* @property {Object} namespaces Site namespaces info in the same format as | ||
* returned by PHP api. | ||
* returned by PHP api. | ||
* @property {Object} namespacealiases Site namespace aliases in the same format | ||
* as returned by PHP api. | ||
* as returned by PHP api. | ||
*/ | ||
var regexCache = {}; | ||
function _createInvalidTitleRegex(legalTitleChars) { | ||
if (regexCache[legalTitleChars]) { | ||
return regexCache[legalTitleChars]; | ||
} | ||
// Any character not allowed is forbidden | ||
regexCache[legalTitleChars] = new RegExp('[^' + | ||
utils.convertByteClassToUnicodeClass(legalTitleChars) + ']' + | ||
// URL percent encoding sequences interfere with the ability | ||
// to round-trip titles -- you can't link to them consistently. | ||
'|%[0-9A-Fa-f]{2}' + | ||
// XML/HTML character references produce similar issues. | ||
'|&[A-Za-z0-9\x80-\xff]+;' + | ||
'|&#[0-9]+;' + | ||
'|&#x[0-9A-Fa-f]+;'); | ||
return regexCache[legalTitleChars]; | ||
/** | ||
* Represents a wiki namespace | ||
* | ||
* @param {number} id The namespace identifier | ||
* @param {SiteInfo} siteInfo The site metadata information. | ||
* @constructor | ||
*/ | ||
function Namespace(id, siteInfo) { | ||
this._siteInfo = siteInfo; | ||
this._id = Number(id); | ||
} | ||
@@ -66,3 +55,3 @@ | ||
return ns.canonical && canonicalName(ns.canonical) === name | ||
|| ns['*'] && canonicalName(ns['*']) === name; | ||
|| ns['*'] && canonicalName(ns['*']) === name; | ||
}); | ||
@@ -81,4 +70,101 @@ if (!index) { | ||
/** | ||
* Creates a namespace instance from namespace text or a namespace alias | ||
* | ||
* @param {string} text Namespace name text. | ||
* @param {SiteInfo} siteInfo the site information. | ||
* @returns {Namespace|undefined} a namespace or undefined if it wasn't found. | ||
*/ | ||
Namespace.fromText = function(text, siteInfo) { | ||
var index = _getNSIndex(text, siteInfo); | ||
if (index !== undefined) { | ||
return new Namespace(index, siteInfo); | ||
} | ||
return undefined; | ||
}; | ||
/** | ||
* Creates a namespace object for a `Main` namespace. | ||
* | ||
* @param {SiteInfo} siteInfo the site information. | ||
* @returns {Namespace} | ||
*/ | ||
Namespace.main = function(siteInfo) { | ||
return new Namespace(0, siteInfo); | ||
}; | ||
/** | ||
* Checks whether namespace is `Special`. | ||
* | ||
* @returns {boolean} | ||
*/ | ||
Namespace.prototype.isSpecial = function() { | ||
return this._id === -1; | ||
}; | ||
/** | ||
* Checks whether namespace is `Main`. | ||
* | ||
* @returns {boolean} | ||
*/ | ||
Namespace.prototype.isMain = function() { | ||
return this._id === 0; | ||
}; | ||
/** | ||
* Checks whether namespace is `Talk`. | ||
* | ||
* @returns {boolean} | ||
*/ | ||
Namespace.prototype.isTalk = function() { | ||
return this._id === 1; | ||
}; | ||
/** | ||
* Checks whether namespace is `User`. | ||
* | ||
* @returns {boolean} | ||
*/ | ||
Namespace.prototype.isUser = function() { | ||
return this._id === 2; | ||
}; | ||
/** | ||
* Checks whether namespace is `User_Talk`. | ||
* | ||
* @returns {boolean} | ||
*/ | ||
Namespace.prototype.isUserTalk = function() { | ||
return this._id === 3; | ||
}; | ||
/** | ||
* Get the canonical name string for this namespace. | ||
* | ||
* @returns {string} | ||
*/ | ||
Namespace.prototype.getNormalizedText = function() { | ||
return this._siteInfo.namespaces[this._id + '']['*'].replace(/ /g, '_'); | ||
}; | ||
var regexCache = {}; | ||
function _createInvalidTitleRegex(legalTitleChars) { | ||
if (regexCache[legalTitleChars]) { | ||
return regexCache[legalTitleChars]; | ||
} | ||
// Any character not allowed is forbidden | ||
regexCache[legalTitleChars] = new RegExp('[^' + | ||
utils.convertByteClassToUnicodeClass(legalTitleChars) + ']' + | ||
// URL percent encoding sequences interfere with the ability | ||
// to round-trip titles -- you can't link to them consistently. | ||
'|%[0-9A-Fa-f]{2}' + | ||
// XML/HTML character references produce similar issues. | ||
'|&[A-Za-z0-9\x80-\xff]+;' + | ||
'|&#[0-9]+;' + | ||
'|&#x[0-9A-Fa-f]+;'); | ||
return regexCache[legalTitleChars]; | ||
} | ||
function _capitalizeTitle(result, siteInfo) { | ||
if (siteInfo.namespaces[result.namespace].case === 'first-letter') { | ||
if (siteInfo.case === 'first-letter') { | ||
if (result.title[0] === 'i' && (siteInfo.lang === 'az' | ||
@@ -102,3 +188,3 @@ || siteInfo.lang === 'tr' | ||
var namespaceText = match[1]; | ||
var ns = _getNSIndex(namespaceText, siteInfo); | ||
var ns = Namespace.fromText(namespaceText, siteInfo); | ||
if (ns !== undefined) { | ||
@@ -113,3 +199,3 @@ return { | ||
title: title, | ||
namespace: NS_MAIN | ||
namespace: Namespace.main(siteInfo) | ||
}; | ||
@@ -161,3 +247,3 @@ } | ||
var split = _splitNamespace(title, siteInfo); | ||
if (split.namespace && split.namespace !== NS_MAIN) { | ||
if (split.namespace && !split.namespace.isMain()) { | ||
throw new utils.TitleError({ | ||
@@ -171,3 +257,3 @@ type: 'title-invalid-talk-namespace', | ||
function _checkMaxLength(title, namespace) { | ||
var maxLength = namespace !== NS_SPECIAL ? 255 : 512; | ||
var maxLength = !namespace.isSpecial() ? 255 : 512; | ||
if (title.length > maxLength) { | ||
@@ -183,2 +269,19 @@ throw new utils.TitleError({ | ||
/** | ||
* Creates a new title object with article the dbKey and namespace | ||
* | ||
* @param {string} key The article title in a form of the dbKey. | ||
* @param {Namespace|number} namespace The article namespace. | ||
* @param {SiteInfo} siteInfo The site metadata. | ||
* @param {string} [fragment] The fragment of the title. | ||
* @constructor | ||
*/ | ||
function Title(key, namespace, siteInfo, fragment) { | ||
this._key = key; | ||
this._namespace = namespace.constructor.name === 'Namespace' ? | ||
namespace : new Namespace(namespace, siteInfo); | ||
this._siteInfo = siteInfo; | ||
this._fragment = fragment; | ||
} | ||
/** | ||
* Normalize a title according to the rules of <domain> | ||
@@ -189,7 +292,5 @@ * | ||
* | ||
* @returns {string} normalized version of a title. | ||
* | ||
* @public | ||
* @returns {Title} The resulting title object. | ||
*/ | ||
function normalize(title, siteInfo) { | ||
Title.newFromText = function(title, siteInfo) { | ||
if (typeof title !== 'string') { | ||
@@ -219,3 +320,3 @@ throw new TypeError('Invalid type of title parameter. Must be a string'); | ||
var result = _splitNamespace(title, siteInfo); | ||
if (result.namespace === NS_TALK) { | ||
if (result.namespace.isTalk()) { | ||
_checkTalkNamespace(result.title, siteInfo); | ||
@@ -245,26 +346,41 @@ } | ||
if (result.namespace !== NS_MAIN) { | ||
if (!result.namespace.isMain()) { | ||
_checkEmptyTitle(result.title); | ||
} | ||
if (result.namespace === NS_USER || result.namespace === NS_USER_TALK) { | ||
if (result.namespace.isUser() || result.namespace.isUserTalk()) { | ||
result.title = sanitizeIP(result.title); | ||
} | ||
var normalized = result.title; | ||
return new Title(result.title, result.namespace, siteInfo); | ||
}; | ||
if (result.namespace !== NS_MAIN) { | ||
normalized = siteInfo.namespaces[result.namespace]['*'].replace(/ /g, '_') | ||
+ ':' + normalized; | ||
/** | ||
* Returns the normalized article title and namespace. | ||
* | ||
* @returns {string} | ||
*/ | ||
Title.prototype.getPrefixedDBKey = function() { | ||
var normalized = this._key; | ||
if (!this._namespace.isMain()) { | ||
normalized = this._namespace.getNormalizedText() + ':' + normalized; | ||
} | ||
if (result.fragment) { | ||
normalized = normalized + '#' + result.fragment; | ||
if (this._fragment) { | ||
normalized = normalized + '#' + this._fragment; | ||
} | ||
return normalized; | ||
} | ||
}; | ||
module.exports = { | ||
normalize: normalize | ||
}; | ||
/** | ||
* Returns the namespace of an article. | ||
* | ||
* @returns {Namespace} | ||
*/ | ||
Title.prototype.getNamespace = function() { | ||
return this._namespace; | ||
}; | ||
module.exports = {}; | ||
module.exports.Namespace = Namespace; | ||
module.exports.Title = Title; |
{ | ||
"name": "mediawiki-title", | ||
"version": "0.2.4", | ||
"version": "0.3.0", | ||
"description": "Title normalization library for mediawiki", | ||
@@ -5,0 +5,0 @@ "main": "lib/index.js", |
142
README.md
@@ -7,8 +7,9 @@ # mediawiki-title [![npm version](https://badge.fury.io/js/mediawiki-title.svg)](https://badge.fury.io/js/mediawiki-title) [![Build Status](https://travis-ci.org/wikimedia/mediawiki-title.svg?branch=master)](https://travis-ci.org/wikimedia/mediawiki-title) [![Coverage Status](https://coveralls.io/repos/github/wikimedia/mediawiki-title/badge.svg?branch=master)](https://coveralls.io/github/wikimedia/mediawiki-title?branch=master) [![Dependencies](https://david-dm.org/wikimedia/mediawiki-title.svg?branch=master)](https://david-dm.org/wikimedia/mediawiki-title?branch=master) | ||
## Functions | ||
## Classes | ||
<dl> | ||
<dt><a href="#normalize">normalize(title, siteInfo)</a> ⇒ <code>string</code></dt> | ||
<dd><p>Normalize a title according to the rules of <domain></p> | ||
</dd> | ||
<dt><a href="#Namespace">Namespace</a></dt> | ||
<dd></dd> | ||
<dt><a href="#Title">Title</a></dt> | ||
<dd></dd> | ||
</dl> | ||
@@ -20,13 +21,127 @@ | ||
<dt><a href="#SiteInfo">SiteInfo</a> : <code>Object</code></dt> | ||
<dd><p>Information about a wikimedia site required to make correct normalization.</p> | ||
<dd><p>Information about a wikimedia site required to make correct | ||
normalization.</p> | ||
</dd> | ||
</dl> | ||
<a name="normalize"></a> | ||
## normalize(title, siteInfo) ⇒ <code>string</code> | ||
<a name="Namespace"></a> | ||
## Namespace | ||
**Kind**: global class | ||
* [Namespace](#Namespace) | ||
* [new Namespace(id, siteInfo)](#new_Namespace_new) | ||
* _instance_ | ||
* [.isSpecial()](#Namespace+isSpecial) ⇒ <code>boolean</code> | ||
* [.isMain()](#Namespace+isMain) ⇒ <code>boolean</code> | ||
* [.isTalk()](#Namespace+isTalk) ⇒ <code>boolean</code> | ||
* [.isUser()](#Namespace+isUser) ⇒ <code>boolean</code> | ||
* [.isUserTalk()](#Namespace+isUserTalk) ⇒ <code>boolean</code> | ||
* [.getNormalizedText()](#Namespace+getNormalizedText) ⇒ <code>string</code> | ||
* _static_ | ||
* [.fromText(text, siteInfo)](#Namespace.fromText) ⇒ <code>[Namespace](#Namespace)</code> | <code>undefined</code> | ||
* [.main(siteInfo)](#Namespace.main) ⇒ <code>[Namespace](#Namespace)</code> | ||
<a name="new_Namespace_new"></a> | ||
### new Namespace(id, siteInfo) | ||
Represents a wiki namespace | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| id | <code>number</code> | The namespace identifier | | ||
| siteInfo | <code>[SiteInfo](#SiteInfo)</code> | The site metadata information. | | ||
<a name="Namespace+isSpecial"></a> | ||
### namespace.isSpecial() ⇒ <code>boolean</code> | ||
Checks whether namespace is `Special`. | ||
**Kind**: instance method of <code>[Namespace](#Namespace)</code> | ||
<a name="Namespace+isMain"></a> | ||
### namespace.isMain() ⇒ <code>boolean</code> | ||
Checks whether namespace is `Main`. | ||
**Kind**: instance method of <code>[Namespace](#Namespace)</code> | ||
<a name="Namespace+isTalk"></a> | ||
### namespace.isTalk() ⇒ <code>boolean</code> | ||
Checks whether namespace is `Talk`. | ||
**Kind**: instance method of <code>[Namespace](#Namespace)</code> | ||
<a name="Namespace+isUser"></a> | ||
### namespace.isUser() ⇒ <code>boolean</code> | ||
Checks whether namespace is `User`. | ||
**Kind**: instance method of <code>[Namespace](#Namespace)</code> | ||
<a name="Namespace+isUserTalk"></a> | ||
### namespace.isUserTalk() ⇒ <code>boolean</code> | ||
Checks whether namespace is `User_Talk`. | ||
**Kind**: instance method of <code>[Namespace](#Namespace)</code> | ||
<a name="Namespace+getNormalizedText"></a> | ||
### namespace.getNormalizedText() ⇒ <code>string</code> | ||
Get the canonical name string for this namespace. | ||
**Kind**: instance method of <code>[Namespace](#Namespace)</code> | ||
<a name="Namespace.fromText"></a> | ||
### Namespace.fromText(text, siteInfo) ⇒ <code>[Namespace](#Namespace)</code> | <code>undefined</code> | ||
Creates a namespace instance from namespace text or a namespace alias | ||
**Kind**: static method of <code>[Namespace](#Namespace)</code> | ||
**Returns**: <code>[Namespace](#Namespace)</code> | <code>undefined</code> - a namespace or undefined if it wasn't found. | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| text | <code>string</code> | Namespace name text. | | ||
| siteInfo | <code>[SiteInfo](#SiteInfo)</code> | the site information. | | ||
<a name="Namespace.main"></a> | ||
### Namespace.main(siteInfo) ⇒ <code>[Namespace](#Namespace)</code> | ||
Creates a namespace object for a `Main` namespace. | ||
**Kind**: static method of <code>[Namespace](#Namespace)</code> | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| siteInfo | <code>[SiteInfo](#SiteInfo)</code> | the site information. | | ||
<a name="Title"></a> | ||
## Title | ||
**Kind**: global class | ||
* [Title](#Title) | ||
* [new Title(key, namespace, siteInfo, [fragment])](#new_Title_new) | ||
* _instance_ | ||
* [.getPrefixedDBKey()](#Title+getPrefixedDBKey) ⇒ <code>string</code> | ||
* [.getNamespace()](#Title+getNamespace) ⇒ <code>[Namespace](#Namespace)</code> | ||
* _static_ | ||
* [.newFromText(title, siteInfo)](#Title.newFromText) ⇒ <code>[Title](#Title)</code> | ||
<a name="new_Title_new"></a> | ||
### new Title(key, namespace, siteInfo, [fragment]) | ||
Creates a new title object with article the dbKey and namespace | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| key | <code>string</code> | The article title in a form of the dbKey. | | ||
| namespace | <code>[Namespace](#Namespace)</code> | <code>number</code> | The article namespace. | | ||
| siteInfo | <code>[SiteInfo](#SiteInfo)</code> | The site metadata. | | ||
| [fragment] | <code>string</code> | The fragment of the title. | | ||
<a name="Title+getPrefixedDBKey"></a> | ||
### title.getPrefixedDBKey() ⇒ <code>string</code> | ||
Returns the normalized article title and namespace. | ||
**Kind**: instance method of <code>[Title](#Title)</code> | ||
<a name="Title+getNamespace"></a> | ||
### title.getNamespace() ⇒ <code>[Namespace](#Namespace)</code> | ||
Returns the namespace of an article. | ||
**Kind**: instance method of <code>[Title](#Title)</code> | ||
<a name="Title.newFromText"></a> | ||
### Title.newFromText(title, siteInfo) ⇒ <code>[Title](#Title)</code> | ||
Normalize a title according to the rules of <domain> | ||
**Returns**: <code>string</code> - normalized version of a title. | ||
**Access:** public | ||
**Kind**: static method of <code>[Title](#Title)</code> | ||
**Returns**: <code>[Title](#Title)</code> - The resulting title object. | ||
| Param | Type | Description | | ||
@@ -41,3 +156,4 @@ | --- | --- | --- | | ||
normalization. | ||
**Kind**: global typedef | ||
**Properties** | ||
@@ -49,2 +165,3 @@ | ||
| legaltitlechars | <code>string</code> | A perl-like regex for characters allowed in the page title. | | ||
| case | <code>string</code> | Whether to capitalize the first letter of the title. Could be obtained from the `general` section of the `siteInfo` php API response. | | ||
| namespaces | <code>Object</code> | Site namespaces info in the same format as returned by PHP api. | | ||
@@ -60,3 +177,3 @@ | namespacealiases | <code>Object</code> | Site namespace aliases in the same format as returned by PHP api. | | ||
```javascript | ||
var result = normalizer.normalize('some_title', { | ||
var result = Title.fromPrefixedText('some_title', { | ||
lang: 'en', | ||
@@ -73,2 +190,3 @@ legaltitlechars: " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+", | ||
}); | ||
console.log(result.getPrefixedDBKey()); | ||
``` | ||
@@ -75,0 +193,0 @@ |
'use strict'; | ||
var assert = require('assert'); | ||
var normalize = require('../lib/index').normalize; | ||
var Title = require('../lib/index').Title; | ||
var utils = require('../lib/utils'); | ||
@@ -32,2 +32,3 @@ var preq = require('preq'); | ||
legaltitlechars: res.body.query.general.legaltitlechars, | ||
case: res.body.query.general.case, | ||
namespaces: res.body.query.namespaces, | ||
@@ -96,3 +97,3 @@ namespacealiases: res.body.query.namespacealiases | ||
.then(function(siteInfo) { | ||
return normalize(testCase[0], siteInfo); | ||
return Title.newFromText(testCase[0], siteInfo); | ||
}) | ||
@@ -144,3 +145,3 @@ .then(function () { | ||
.then(function(siteInfo) { | ||
return normalize(title[0], siteInfo); | ||
return Title.newFromText(title[0], siteInfo); | ||
}) | ||
@@ -191,3 +192,2 @@ }); | ||
// Case-sensitive namespace | ||
[ 'en.wikipedia.org', 'Gadget definition:test', 'Gadget_definition:test'], | ||
[ 'en.wikipedia.org', 'user:pchelolo', 'User:Pchelolo'], | ||
@@ -201,3 +201,3 @@ | ||
.then(function(siteInfo) { | ||
return normalize(test[1], siteInfo); | ||
return Title.newFromText(test[1], siteInfo).getPrefixedDBKey(); | ||
}) | ||
@@ -298,6 +298,6 @@ .then(function(res) { | ||
.then(function(siteInfo) { | ||
return normalize('1', siteInfo); | ||
return Title.newFromText('1', siteInfo); | ||
}) | ||
.then(function (res) { | ||
assert.deepEqual(res, '1'); | ||
assert.deepEqual(res.getPrefixedDBKey(), '1'); | ||
}); | ||
@@ -304,0 +304,0 @@ }); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
39398
840
193