Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

mediawiki-title

Package Overview
Dependencies
Maintainers
4
Versions
32
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

mediawiki-title - npm Package Compare versions

Comparing version 0.1.2 to 0.2.0

134

lib/index.js
"use strict";
var P = require('bluebird');
var preq = require('preq');
var sanitizeIP = require('./ip');

@@ -25,9 +23,24 @@ var utils = require('./utils');

/**
* Information about a wikimedia site required to make correct
* normalization.
*
* @typedef SiteInfo
* @type Object
* @property {string} lang Site language code.
* @property {string} legaltitlechars A perl-like regex for characters
* allowed in the page title.
* @property {Object} namespaces Site namespaces info in the same format as
* returned by PHP api.
* @property {Object} namespacealiases Site namespace aliases in the same format
* as returned by PHP api.
*/
/**
* Creates an instance of title normalizer.
*
* @param {Object} options the normalizer options
* @param {Function(string):string} options.apiURI a function
* @param {Function(string):Promise(SiteInfo)} options.getSiteInfo a function
* that takes a domain string and returns back
* an API URI that needs to be contacted to get
* the site information used for normalization.
* the information about the site. The info is cached
* in-memory, so there's no need for external caching.
* @constructor

@@ -37,3 +50,3 @@ * @public

function Normalizer(options) {
if (!options || typeof options.apiURI !== 'function') {
if (!options || typeof options.getSiteInfo !== 'function') {
throw new TypeError('Invalid options for Normalizer constructor');

@@ -67,23 +80,15 @@ }

if (self._siteInfo[domain]) {
return P.resolve(self._siteInfo[domain]);
return self._siteInfo[domain];
} else {
return preq.post({
uri: self._options.apiURI(domain),
body: {
action: 'query',
meta: 'siteinfo',
siprop: 'general|namespaces|namespacealiases',
format: 'json'
}
})
.then(function(res) {
res = res.body.query;
self._siteInfo[domain] = {
lang: res.general.lang,
invalidTitleRegex: self._createInvalidTitleRegex(res.general.legaltitlechars),
namespaces: res.namespaces,
namespacealiases: res.namespacealiases
self._siteInfo[domain] = self._options.getSiteInfo(domain)
.then(function(info) {
return {
lang: info.lang,
invalidTitleRegex: self._createInvalidTitleRegex(info.legaltitlechars),
namespaces: info.namespaces,
namespacealiases: info.namespacealiases
};
return self._siteInfo[domain];
});
return self._siteInfo[domain];
}

@@ -230,3 +235,4 @@ };

var self = this;
return P.try(function() {
return self._getSiteInfo(domain)
.then(function(siteInfo) {
title = title.replace(/ /g, '_')

@@ -247,50 +253,48 @@ // Strip Unicode bidi override characters.

self._checkEmptyTitle(title);
return self._getSiteInfo(domain)
.then(function(siteInfo) {
var result = self._splitNamespace(title, siteInfo);
if (result.namespace === NS_TALK) {
self._checkTalkNamespace(result.title, siteInfo);
}
var fragmentIndex = result.title.indexOf('#');
if (fragmentIndex >= 0) {
var fragment = result.title.substr(fragmentIndex);
result.fragment = fragment.substr(1).replace(/_/g, ' ');
result.title = result.title
.substring(result.title, result.title.length - fragment.length)
.replace('/_*$/', '');
}
self._checkLegalTitleCharacters(result.title, siteInfo);
self._checkRelativeTitle(result.title);
// Magic tilde sequences? Nu-uh!
if (result.title.indexOf('~~~') !== -1) {
throw new utils.TitleError({
type: 'title-invalid-magic-tilde',
title: title
});
}
self._checkMaxLength(result.title, result.namespace);
var result = self._splitNamespace(title, siteInfo);
if (result.namespace === NS_TALK) {
self._checkTalkNamespace(result.title, siteInfo);
}
var fragmentIndex = result.title.indexOf('#');
if (fragmentIndex >= 0) {
var fragment = result.title.substr(fragmentIndex);
result.fragment = fragment.substr(1).replace(/_/g, ' ');
result.title = result.title
.substring(result.title, result.title.length - fragment.length)
.replace('/_*$/', '');
}
result = self._capitalizeTitle(result, siteInfo);
self._checkLegalTitleCharacters(result.title, siteInfo);
self._checkRelativeTitle(result.title);
// Magic tilde sequences? Nu-uh!
if (result.title.indexOf('~~~') !== -1) {
throw new utils.TitleError({
type: 'title-invalid-magic-tilde',
title: title
});
}
self._checkMaxLength(result.title, result.namespace);
if (result.namespace !== NS_MAIN) {
self._checkEmptyTitle(result.title);
}
result = self._capitalizeTitle(result, siteInfo);
if (result.namespace === NS_USER || result.namespace === NS_USER_TALK) {
result.title = sanitizeIP(result.title);
}
if (result.namespace !== NS_MAIN) {
self._checkEmptyTitle(result.title);
}
var normalized = result.title;
if (result.namespace === NS_USER || result.namespace === NS_USER_TALK) {
result.title = sanitizeIP(result.title);
}
if (result.namespace !== NS_MAIN) {
normalized = siteInfo.namespaces[result.namespace]['*'] + ':' + normalized;
}
var normalized = result.title;
if (result.fragment) {
normalized = normalized + '#' + result.fragment;
}
if (result.namespace !== NS_MAIN) {
normalized = siteInfo.namespaces[result.namespace]['*'] + ':' + normalized;
}
return normalized;
});
if (result.fragment) {
normalized = normalized + '#' + result.fragment;
}
return normalized;
});

@@ -297,0 +301,0 @@ };

{
"name": "mediawiki-title",
"version": "0.1.2",
"version": "0.2.0",
"description": "Title normalization library for mediawiki",

@@ -26,6 +26,2 @@ "main": "lib/index.js",

},
"dependencies": {
"bluebird": "^3.1.1",
"preq": "^0.4.8"
},
"devDependencies": {

@@ -37,4 +33,5 @@ "coveralls": "^2.11.6",

"mocha-jshint": "^2.2.6",
"mocha-lcov-reporter": "^1.0.0"
"mocha-lcov-reporter": "^1.0.0",
"preq": "^0.4.8"
}
}

@@ -1,2 +0,2 @@

# mediawiki-title [![npm version](https://badge.fury.io/js/mediawiki-title.svg)](https://badge.fury.io/js/mediawiki-title) [![Build Status](https://travis-ci.org/wikimedia/mediawiki-title.svg?branch=master)](https://travis-ci.org/wikimedia/mediawiki-title) [![Coverage Status](https://coveralls.io/repos/github/wikimedia/analytics-mediawiki-storage/badge.svg?branch=master)](https://coveralls.io/github/wikimedia/analytics-mediawiki-storage?branch=master) [![Dependencies](https://david-dm.org/wikimedia/mediawiki-title.svg?branch=master)](https://david-dm.org/wikimedia/mediawiki-title?branch=master)
# mediawiki-title [![npm version](https://badge.fury.io/js/mediawiki-title.svg)](https://badge.fury.io/js/mediawiki-title) [![Build Status](https://travis-ci.org/wikimedia/mediawiki-title.svg?branch=master)](https://travis-ci.org/wikimedia/mediawiki-title) [![Coverage Status](https://coveralls.io/repos/github/wikimedia/mediawiki-title/badge.svg?branch=master)](https://coveralls.io/github/wikimedia/mediawiki-title?branch=master) [![Dependencies](https://david-dm.org/wikimedia/mediawiki-title.svg?branch=master)](https://david-dm.org/wikimedia/mediawiki-title?branch=master)

@@ -7,4 +7,4 @@ Mediawiki title normalizetion, that conforms to the normalization rules used in [MediaWiki Core](https://www.mediawiki.org/wiki/API:Query#Title_normalization).

<a name="API"></a>
## API
<a name="Normalizer"></a>
## Normalizer

@@ -19,6 +19,7 @@ * [Normalizer](#Normalizer)

| Param | Type | Description |
| --- | --- | --- |
| options | <code>Object</code> | the normalizer options |
| options.apiURI | <code>function</code> | a function that takes a domain string and returns back an API URI that needs to be contacted to get the site information used for normalization. |
| options.getSiteInfo | <code>function</code> | a function that takes a domain string and returns back the information about the site. The info is cached in-memory, so there's no need for external caching. |

@@ -38,2 +39,16 @@ <a name="Normalizer+normalize"></a>

<a name="SiteInfo"></a>
## SiteInfo : <code>Object</code>
Information about a wikimedia site required to make correct
normalization.
**Properties**
| Name | Type | Description |
| --- | --- | --- |
| lang | <code>string</code> | Site language code. |
| legaltitlechars | <code>string</code> | A perl-like regex for characters allowed in the page title. |
| namespaces | <code>Object</code> | Site namespaces info in the same format as returned by PHP api. |
| namespacealiases | <code>Object</code> | Site namespace aliases in the same format as returned by PHP api. |
## Usage

@@ -47,3 +62,16 @@

var normalizer = new Normalizer({
apiURI: function(domain) { return 'https://' + domain + '/w/api.php'; }
getSiteInfo: function(domain) {
return {
lang: 'en',
legaltitlechars: " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+",
namespaces: {
"0": {
id: 0,
case: "first-letter",
content: "",
"*": ""
},
}
};
}
});

@@ -50,0 +78,0 @@

@@ -6,2 +6,4 @@ 'use strict';

var utils = require('../lib/utils');
var preq = require('preq');
var P = require('bluebird');

@@ -14,4 +16,20 @@ // Run jshint as part of normal testing

var normalizer = new Normalizer({
apiURI: function (domain) {
return 'https://' + domain + '/w/api.php';
getSiteInfo: function (domain) {
return preq.post({
uri: 'https://' + domain + '/w/api.php',
body: {
action: 'query',
meta: 'siteinfo',
siprop: 'general|namespaces|namespacealiases',
format: 'json'
}
})
.then(function(res) {
return {
lang: res.body.query.general.lang,
legaltitlechars: res.body.query.general.legaltitlechars,
namespaces: res.body.query.namespaces,
namespacealiases: res.body.query.namespacealiases
}
});
}

@@ -285,2 +303,29 @@ });

});
it('Should fetch domains', function() {
return preq.get({
uri: 'https://en.wikipedia.org/w/api.php?action=sitematrix&format=json'
})
.then(function(res) {
return Object.keys(res.body.sitematrix)
.filter(function(idx) {
return idx !== 'count' && idx !== 'specials';
})
.map(function (idx) {
return res.body.sitematrix[idx].site[0].url.replace(/^https?:\/\//, '');
});
})
.then(function (domains) {
describe('Various domains', function() {
domains.forEach(function (domain) {
it('Should work for ' + domain, function() {
return normalizer.normalize('1', domain)
.then(function (res) {
assert.deepEqual(res, '1');
});
});
});
});
});
});
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc