Comparing version 0.3.0 to 0.4.0
{ | ||
"name": "mwn", | ||
"version": "0.3.0", | ||
"version": "0.4.0", | ||
"description": "MediaWiki bot framework for NodeJS", | ||
@@ -18,2 +18,6 @@ "main": "./src/bot.js", | ||
"license": "GPL-3.0-or-later", | ||
"bugs": { | ||
"url": "https://github.com/siddharthvp/mwn/issues" | ||
}, | ||
"homepage": "https://github.com/siddharthvp/mwn#readme", | ||
"files": [ | ||
@@ -20,0 +24,0 @@ "src/" |
@@ -5,3 +5,3 @@ # mwn | ||
Development status: **Unstable**. mwn currently does not follow semantic versioning rules. For now, changes may be made to the public interface without a change in version number. | ||
Development status: **Unstable**. Versioning: while mwn is in version 0, changes may be made to the public interface with a change in the minor version number. | ||
@@ -14,3 +14,7 @@ Documentation given below is incomplete. There are a number of additional classes such as `bot.title`, `bot.wikitext`, `bot.page`, etc that provide useful functionality but aren't documented. | ||
Until mwn is released on npm, enter the `node_modules` directory of your project and run: | ||
To install, run `npm install mwn`. | ||
[![Download stats](https://nodei.co/npm/mwn.png?downloads=true&downloadRank=true)](https://nodei.co/npm/mwn/) | ||
Or obtain the latest development copy: | ||
```sh | ||
@@ -17,0 +21,0 @@ git clone https://github.com/siddharthvp/mwn.git |
module.exports = function(bot) { | ||
/** | ||
* Class for some basic wikitext parsing, involving | ||
* links, files, categories and templates. | ||
* | ||
* For more advanced and sophisticated wikitext parsing, use can | ||
* mwparserfromhell <https://github.com/earwig/mwparserfromhell> | ||
* implemented in python (which you can use within node.js using | ||
* the child_process interface). However, mwparserfromhell doesn't | ||
* recognize localised namespaces and wiki-specific configs. | ||
*/ | ||
class Wikitext { | ||
/** | ||
* @constructor | ||
* @param {string} wikitext | ||
*/ | ||
/** @param {string} wikitext */ | ||
constructor(wikitext) { | ||
@@ -20,10 +27,13 @@ this.text = wikitext; | ||
var n = this.text.length; | ||
var startIdx, endIdx; | ||
// files can have links in captions; use a stack to handle the nesting | ||
var stack = new Stack(); | ||
for (let i=0; i<n; i++) { | ||
if (this.text[i] === '[' && this.text[i+1] === '[') { | ||
startIdx = i + 2; | ||
} else if (this.text[i] === ']' && this.text[i+1] === ']') { | ||
endIdx = i; | ||
processLink(this, this.text.slice(startIdx, endIdx)); | ||
startIdx = null; | ||
stack.push({startIdx: i }); | ||
i++; | ||
} else if (this.text[i] === ']' && this.text[i+1] === ']' && stack.top()) { | ||
stack.top().endIdx = i + 1; | ||
processLink(this, stack.top().startIdx, stack.top().endIdx); | ||
stack.pop(); | ||
i++; // necessary to handle cases like [[File:ImageName|thumb|A [[hill]]]] | ||
} | ||
@@ -47,11 +57,36 @@ } | ||
} | ||
* @param {Boolean} recursive Set to `true` to also parse templates that occur | ||
* within other templates, rather than just top-level templates. | ||
* @param {number} [count] - maximum number of templates to parse (default infinite) | ||
* @return Template[] | ||
*/ | ||
parseTemplates(recursive) { | ||
return this.templates = parseTemplates(this.text, recursive); | ||
parseTemplates(count) { | ||
return this.templates = parseTemplates(this.text, false, count); | ||
} | ||
/** | ||
* Also parse templates that occur within other templates, rather than just top-level templates. | ||
* @param {number} [depth=true] - specify a number to limit recursive parsing to a the given recursion | ||
* depth. For infinite depth, specify `true` (default). Eg. with recursive=1, all templates and | ||
* sub-templates will be parsed, but not the templates within the sub-templates | ||
*/ | ||
parseTemplatesRecursive(depth) { | ||
return this.templates = parseTemplates(this.text, depth || true); | ||
} | ||
/** | ||
* Remove a template, link, file or category from the text | ||
* CAUTION: If an entity with the very same wikitext exists earlier in the text, | ||
* that one will be removed instead. | ||
* @param {Object|Template} entity - anything with a wikitext attribute | ||
* and end index | ||
*/ | ||
removeEntity(entity) { | ||
this.text = this.text.replace(entity.wikitext, ''); | ||
} | ||
/** Get the updated text @returns {string} */ | ||
getText() { | ||
return this.text; | ||
} | ||
/** | ||
* Parse the text using the API. | ||
@@ -103,4 +138,11 @@ * @see https://www.mediawiki.org/wiki/API:Parsing_wikitext | ||
var processLink = function(self, linktext) { | ||
var [target, displaytext] = linktext.split('|'); | ||
class Stack extends Array { | ||
top() { | ||
return this[this.length - 1]; | ||
} | ||
} | ||
var processLink = function(self, startIdx, endIdx) { | ||
var linktext = self.text.slice(startIdx, endIdx + 1); | ||
var [target, displaytext] = linktext.slice(2, -2).split('|'); | ||
var noSortkey = false; | ||
@@ -115,18 +157,25 @@ if (!displaytext) { | ||
} | ||
var linkobj = { | ||
wikitext: linktext, | ||
dsr: [startIdx, endIdx] // Note: data source ranges (dsr) are invalidated by any removeEntity() operation | ||
}; | ||
if (target[0] !== ':') { | ||
if (title.namespace === 6) { | ||
self.files.push({ | ||
self.files.push(Object.assign({ | ||
target: title, | ||
props: linktext.slice(linktext.indexOf('|') + 1) | ||
}); | ||
props: linktext.slice(linktext.indexOf('|') + 1, -2) | ||
}, linkobj)); | ||
return; | ||
} else if (title.namespace === 14) { | ||
self.categories.push({ | ||
self.categories.push(Object.assign({ | ||
target: title, | ||
sortkey: noSortkey ? '' : displaytext | ||
}); | ||
}, linkobj)); | ||
return; | ||
} | ||
} | ||
self.links.push({target: title, displaytext: displaytext}); | ||
self.links.push(Object.assign({ | ||
target: title, | ||
displaytext: displaytext | ||
}, linkobj)); | ||
}; | ||
@@ -154,4 +203,6 @@ | ||
*/ | ||
constructor(wikitext) { | ||
constructor(wikitext, dsr) { | ||
this.wikitext = wikitext; | ||
// dsr stands for data source range, gives the starting and ending index in wikitext | ||
this.dsr = dsr, // an array of two numbers | ||
this.parameters = []; | ||
@@ -177,3 +228,3 @@ } | ||
this.name = name.trim(); | ||
// this.nameTitle = bot.title.newFromText(name, 10); | ||
this.nameTitle = bot.title.newFromText(name, 10); | ||
} | ||
@@ -185,59 +236,15 @@ } | ||
// https://en.wikipedia.org/wiki/User:SD0001/parseAllTemplates.js (cc-by-sa-3.0/GFDL) | ||
var parseTemplates = function (wikitext, recursive) { | ||
/** | ||
* | ||
* @param {string} wikitext | ||
* @param {boolean|number} [recursive=false] - also parse templates within templates, | ||
* give a number to specify recursion depth. If given as `true`, infinite recursion | ||
* depth is assumed. | ||
* @param {number} [count] - stop parsing when this many templates have been found, | ||
* Recursive parsing does NOT work if count is specified. | ||
*/ | ||
var parseTemplates = function (wikitext, recursive, count) { | ||
var strReplaceAt = function (string, index, char) { | ||
return string.slice(0, index) + char + string.slice(index + 1); | ||
}; | ||
var result = []; | ||
var processTemplateText = function (startIdx, endIdx) { | ||
var text = wikitext.slice(startIdx, endIdx); | ||
var template = new Template('{{' + text.replace(/\1/g, '|') + '}}'); | ||
// swap out pipe in links with \1 control character | ||
// [[File: ]] can have multiple pipes, so might need multiple passes | ||
while (/(\[\[[^\]]*?)\|(.*?\]\])/g.test(text)) { | ||
text = text.replace(/(\[\[[^\]]*?)\|(.*?\]\])/g, '$1\1$2'); | ||
} | ||
var chunks = text.split('|').map(function (chunk) { | ||
// change '\1' control characters back to pipes | ||
return chunk.replace(/\1/g, '|'); | ||
}); | ||
template.setName(chunks[0]); | ||
var parameterChunks = chunks.slice(1); | ||
var unnamedIdx = 1; | ||
parameterChunks.forEach(function (chunk) { | ||
var indexOfEqualTo = chunk.indexOf('='); | ||
var indexOfOpenBraces = chunk.indexOf('{{'); | ||
var isWithoutEquals = !chunk.includes('='); | ||
var hasBracesBeforeEquals = chunk.includes('{{') && indexOfOpenBraces < indexOfEqualTo; | ||
var isUnnamedParam = (isWithoutEquals || hasBracesBeforeEquals); | ||
var pName, pNum, pVal; | ||
if (isUnnamedParam) { | ||
// Get the next number not already used by either an unnamed parameter, | ||
// or by a named parameter like `|1=val` | ||
while (template.getParam(unnamedIdx)) { | ||
unnamedIdx++; | ||
} | ||
pNum = unnamedIdx; | ||
pVal = chunk.trim(); | ||
} else { | ||
pName = chunk.slice(0, indexOfEqualTo).trim(); | ||
pVal = chunk.slice(indexOfEqualTo + 1).trim(); | ||
} | ||
template.addParam(pName || pNum, pVal, chunk); | ||
}); | ||
result.push(template); | ||
}; | ||
var n = wikitext.length; | ||
@@ -271,3 +278,7 @@ | ||
endIdx = i; | ||
processTemplateText(startIdx, endIdx); | ||
var templateWikitext = wikitext.slice(startIdx, endIdx); // without braces | ||
result.push(processTemplateText(templateWikitext, [startIdx - 2, endIdx + 1])); | ||
if (count && result.length === count) { | ||
return result; | ||
} | ||
} | ||
@@ -305,3 +316,3 @@ numUnclosed -= 2; | ||
if (recursive) { | ||
if (recursive && !count) { | ||
var subtemplates = result.map(function (template) { | ||
@@ -312,3 +323,3 @@ return template.wikitext.slice(2, -2); | ||
}).map(function (templateWikitext) { | ||
return parseTemplates(templateWikitext, true); | ||
return parseTemplates(templateWikitext, recursive === true ? true : recursive - 1); | ||
}); | ||
@@ -323,2 +334,59 @@ | ||
/** | ||
* @param {string} text - template wikitext without braces, with the pipes in | ||
* nested templates replaced by \1 | ||
* @param {Number[]} [dsr] - data source range (optional) for the template object | ||
* Array of starting and ending indices of template in wikitext | ||
*/ | ||
var processTemplateText = function (text, dsr) { | ||
var template = new Template('{{' + text.replace(/\1/g, '|') + '}}', dsr); | ||
// swap out pipe in links with \1 control character | ||
// [[File: ]] can have multiple pipes, so might need multiple passes | ||
while (/(\[\[[^\]]*?)\|(.*?\]\])/g.test(text)) { | ||
text = text.replace(/(\[\[[^\]]*?)\|(.*?\]\])/g, '$1\1$2'); | ||
} | ||
var chunks = text.split('|').map(function (chunk) { | ||
// change '\1' control characters back to pipes | ||
return chunk.replace(/\1/g, '|'); | ||
}); | ||
template.setName(chunks[0]); | ||
var parameterChunks = chunks.slice(1); | ||
var unnamedIdx = 1; | ||
parameterChunks.forEach(function (chunk) { | ||
var indexOfEqualTo = chunk.indexOf('='); | ||
var indexOfOpenBraces = chunk.indexOf('{{'); | ||
var isWithoutEquals = !chunk.includes('='); | ||
var hasBracesBeforeEquals = chunk.includes('{{') && indexOfOpenBraces < indexOfEqualTo; | ||
var isUnnamedParam = (isWithoutEquals || hasBracesBeforeEquals); | ||
var pName, pNum, pVal; | ||
if (isUnnamedParam) { | ||
// Get the next number not already used by either an unnamed parameter, | ||
// or by a named parameter like `|1=val` | ||
while (template.getParam(unnamedIdx)) { | ||
unnamedIdx++; | ||
} | ||
pNum = unnamedIdx; | ||
pVal = chunk.trim(); | ||
} else { | ||
pName = chunk.slice(0, indexOfEqualTo).trim(); | ||
pVal = chunk.slice(indexOfEqualTo + 1).trim(); | ||
} | ||
template.addParam(pName || pNum, pVal, chunk); | ||
}); | ||
return template; | ||
}; | ||
var strReplaceAt = function (string, index, char) { | ||
return string.slice(0, index) + char + string.slice(index + 1); | ||
}; | ||
return Wikitext; | ||
@@ -325,0 +393,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
No website
QualityPackage does not have a website.
Found 1 instance in 1 package
134035
3328
0
1
274