Comparing version 0.4.0 to 0.6.0
{ | ||
"name": "mwn", | ||
"version": "0.4.0", | ||
"version": "0.6.0", | ||
"description": "MediaWiki bot framework for NodeJS", | ||
"main": "./src/bot.js", | ||
"scripts": { | ||
"test": "mocha tests/" | ||
"test": "mocha tests/", | ||
"bump": "node bump-version.js" | ||
}, | ||
@@ -28,3 +29,4 @@ "repository": { | ||
"axios-cookiejar-support": "^1.0.0", | ||
"is-promise": "^4.0.0", | ||
"form-data": "^3.0.0", | ||
"oauth-1.0a": "^2.2.6", | ||
"semlog": "^0.6.10", | ||
@@ -43,2 +45,2 @@ "tough-cookie": "^4.0.0" | ||
} | ||
} | ||
} |
852
src/bot.js
/** | ||
* | ||
* mwn: a MediaWiki bot framework for NodeJS | ||
* mwn: a MediaWiki bot framework for Node.js | ||
* | ||
@@ -39,6 +39,10 @@ * Copyright (C) 2020 Siddharth VP | ||
axiosCookieJarSupport(axios); | ||
const formData = require('form-data'); | ||
const OAuth = require('oauth-1.0a'); | ||
const http = require('http'); | ||
const https = require('https'); | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const ispromise = require('is-promise'); | ||
const crypto = require('crypto'); | ||
const semlog = require('semlog'); | ||
@@ -53,5 +57,5 @@ const log = semlog.log; | ||
const File = require('./file'); | ||
const Util = require('./util'); | ||
const static_utils = require('./static_utils'); | ||
class Bot { | ||
class mwn { | ||
@@ -86,3 +90,3 @@ | ||
/** | ||
* Bot instances edit token. Initially set as an invalid token string | ||
* Bot instance's edit token. Initially set as an invalid token string | ||
* so that the badtoken handling logic is invoked if the token is | ||
@@ -96,14 +100,2 @@ * not set before a query is sent. | ||
/** | ||
* Internal statistics | ||
* | ||
* @type {object} | ||
*/ | ||
this.counter = { | ||
total: 0, | ||
resolved: 0, | ||
fulfilled: 0, | ||
rejected: 0 | ||
}; | ||
/** | ||
* Default options. | ||
@@ -121,2 +113,5 @@ * Should be immutable | ||
// User agent string | ||
userAgent: 'mwn', | ||
// bot login username and password, setup using Special:BotPasswords | ||
@@ -126,10 +121,47 @@ username: null, | ||
// OAuth credentials | ||
OAuthCredentials: { | ||
consumerToken: null, | ||
consumerSecret: null, | ||
accessToken: null, | ||
accessSecret: null | ||
}, | ||
// does your account have apihighlimits right? Yes for bots and sysops | ||
hasApiHighLimit: true, | ||
// milliseconds to pause before retrying after a maxlag error | ||
maxlagPause: 5000, | ||
// max number of times to retry the same request on errors due to | ||
// maxlag, wiki being in readonly mode, and other transient errors | ||
maxRetries: 3, | ||
// max number of times to retry the same request on maxlag error. | ||
maxlagMaxRetries: 3 | ||
// milliseconds to pause before retrying after a transient error | ||
retryPause: 5000, | ||
// for emergency-shutoff compliance: shutdown bot if the text of the | ||
// shutoffPage doesn't meet the shutoffRegex | ||
// XXX: not implemented | ||
// shutoffPage: null, | ||
// shutoffRegex: /^\s*$/, | ||
// default parameters included in every API request | ||
defaultParams: { | ||
format: 'json', | ||
formatversion: '2', | ||
maxlag: 5 | ||
}, | ||
// options for the edit() function | ||
editConfig: { | ||
// max number of retries on edit conflicts | ||
conflictRetries: 2, | ||
// suppress warning on an edit resulting in no change to the page | ||
suppressNochangeWarning: false, | ||
// abort edit if exclusionRegex matches on the page content | ||
exclusionRegex: null | ||
}, | ||
// options for logging, see semlog documentation | ||
semlog: { | ||
printDateTime: true | ||
} | ||
}; | ||
@@ -143,3 +175,11 @@ | ||
*/ | ||
this.options = merge(this.defaultOptions, customOptions); | ||
if (typeof customOptions === 'string') { | ||
// Read options from file (JSON): | ||
try { | ||
customOptions = JSON.parse(fs.readFileSync(customOptions).toString()); | ||
} catch (err) { | ||
throw new Error(`Failed to read or parse JSON config file: ` + err); | ||
} | ||
} | ||
this.options = mergeDeep1(this.defaultOptions, customOptions); | ||
@@ -159,26 +199,5 @@ /** | ||
*/ | ||
this.requestOptions = { | ||
method: 'post', | ||
headers: { | ||
'User-Agent': 'mwn' | ||
}, | ||
params: { | ||
}, | ||
data: { | ||
format: 'json', | ||
formatversion: '2', | ||
maxlag: 5 | ||
}, | ||
transformRequest: [ | ||
function(data) { | ||
return Object.entries(data).map(([key, val]) => { | ||
return encodeURIComponent(key) + '=' + encodeURIComponent(val); | ||
}).join('&'); | ||
} | ||
], | ||
timeout: 120000, // 120 seconds | ||
jar: this.cookieJar, | ||
withCredentials: true, | ||
this.requestOptions = mergeDeep1({ | ||
responseType: 'json' | ||
}; | ||
}, mwn.requestDefaults); | ||
@@ -215,8 +234,3 @@ /** | ||
/** | ||
* Util class associated with the bot instance | ||
*/ | ||
this.util = Util(); | ||
// SEMLOG OPTIONS | ||
// set up any semlog options | ||
semlog.updateConfig(this.options.semlog || {}); | ||
@@ -226,2 +240,21 @@ } | ||
/** | ||
* Initialize a bot object. Login to the wiki and fetch editing tokens. | ||
* Also fetches the site data needed for parsing and constructing title objects. | ||
* @param {Object} config - Bot configurations, including apiUrl, and either the | ||
* username and password or the OAuth credentials | ||
* @returns {mwn} bot object | ||
*/ | ||
static async init(config) { | ||
var bot = new mwn(config); | ||
if (bot._usingOAuth()) { | ||
bot.initOAuth(); | ||
await bot.getTokensAndSiteInfo(); | ||
} else { | ||
await bot.loginGetToken(); | ||
} | ||
return bot; | ||
} | ||
/** | ||
* Set and overwrite mwn options | ||
@@ -232,3 +265,3 @@ * | ||
setOptions(customOptions) { | ||
this.options = merge(this.options, customOptions); | ||
this.options = mergeDeep1(this.options, customOptions); | ||
} | ||
@@ -240,3 +273,3 @@ | ||
* | ||
* @param {String} apiUrl - API url to MediaWiki, e.g. https://en.wikipedia.org/w/api.php | ||
* @param {string} apiUrl - API url to MediaWiki, e.g. https://en.wikipedia.org/w/api.php | ||
*/ | ||
@@ -254,3 +287,3 @@ setApiUrl(apiUrl) { | ||
setRequestOptions(customRequestOptions) { | ||
return mergeRequestOptions(this.requestOptions, customRequestOptions); | ||
return mergeDeep1(this.requestOptions, customRequestOptions); | ||
} | ||
@@ -263,3 +296,3 @@ | ||
setDefaultParams(params) { | ||
this.requestOptions.data = merge(this.requestOptions.data, params); | ||
this.options.defaultParams = merge(this.options.defaultParams, params); | ||
} | ||
@@ -273,13 +306,70 @@ | ||
setUserAgent(userAgent) { | ||
if (!this.requestOptions.headers) { | ||
this.requestOptions.headers = {}; | ||
this.options.userAgent = userAgent; | ||
} | ||
/** | ||
* @private | ||
* Determine if we're going to use OAuth for authentication | ||
*/ | ||
_usingOAuth() { | ||
const creds = this.options.OAuthCredentials; | ||
if (typeof creds !== 'object') { | ||
return false; | ||
} | ||
this.requestOptions.headers['User-Agent'] = userAgent; | ||
if (!creds.consumerToken || !creds.consumerSecret || | ||
!creds.accessToken || !creds.accessSecret) { | ||
return false; | ||
} | ||
return true; | ||
} | ||
/** | ||
* Initialize OAuth instance | ||
*/ | ||
initOAuth() { | ||
if (!this._usingOAuth()) { | ||
// without this, the API would return a confusing | ||
// mwoauth-invalid-authorization invalid consumer error | ||
throw new Error('[mwn] Invalid OAuth config'); | ||
} | ||
try { | ||
this.oauth = OAuth({ | ||
consumer: { | ||
key: this.options.OAuthCredentials.consumerToken, | ||
secret: this.options.OAuthCredentials.consumerSecret | ||
}, | ||
signature_method: 'HMAC-SHA1', | ||
// based on example at https://www.npmjs.com/package/oauth-1.0a | ||
hash_function(base_string, key) { | ||
return crypto | ||
.createHmac('sha1', key) | ||
.update(base_string) | ||
.digest('base64'); | ||
} | ||
}); | ||
this.usingOAuth = true; | ||
} catch (err) { | ||
throw new Error('Failed to construct OAuth object. ' + err); | ||
} | ||
} | ||
/** | ||
* @private | ||
* Get OAuth Authorization header | ||
* @param {Object} params | ||
* @returns {Object} | ||
*/ | ||
makeOAuthHeader(params) { | ||
return this.oauth.toHeader(this.oauth.authorize(params, { | ||
key: this.options.OAuthCredentials.accessToken, | ||
secret: this.options.OAuthCredentials.accessSecret | ||
})); | ||
} | ||
/************ CORE REQUESTS ***************/ | ||
/** | ||
* Executes a promisified raw request | ||
* Uses the npm request library | ||
* Executes a raw request | ||
* Uses the axios library | ||
* | ||
@@ -292,15 +382,14 @@ * @param {object} requestOptions | ||
this.counter.total += 1; | ||
this.counter.resolved += 1; | ||
if (!requestOptions.url) { | ||
this.counter.rejected += 1; | ||
return Promise.reject(new Error('No API URL provided!')); | ||
var err = new Error('No URL provided!'); | ||
err.disableRetry = true; | ||
return Promise.reject(err); | ||
} | ||
return axios(requestOptions).then(response => { | ||
this.counter.fulfilled +=1; | ||
return axios(mergeDeep1({}, mwn.requestDefaults, { | ||
method: 'get', | ||
headers: { | ||
'User-Agent': this.options.userAgent | ||
}, | ||
}, requestOptions)).then(response => { | ||
return response.data; | ||
}, error => { | ||
this.counter.rejected +=1; | ||
return Promise.reject(error); | ||
}); | ||
@@ -310,3 +399,2 @@ | ||
/** | ||
@@ -316,11 +404,23 @@ * Executes a request with the ability to use custom parameters and custom | ||
* | ||
* @param {object} params Request Parameters | ||
* @param {object} customRequestOptions Custom request options | ||
* @param {Object} params - API call parameters | ||
* @param {Object} [customRequestOptions={}] - custom axios request options | ||
* | ||
* @returns {Promise} | ||
* @returns {Promise<Object>} | ||
*/ | ||
request(params, customRequestOptions) { | ||
async request(params, customRequestOptions = {}) { | ||
params = merge(this.options.defaultParams, params); | ||
let requestOptions = merge({ | ||
var getOrPost = function(data) { | ||
if (data.action === 'query') { | ||
return 'get'; | ||
} | ||
if (data.action === 'parse' && !data.text) { | ||
return 'get'; | ||
} | ||
return 'post'; | ||
}; | ||
let requestOptions = mergeDeep1({ | ||
url: this.options.apiUrl, | ||
method: getOrPost(params), | ||
@@ -331,5 +431,6 @@ // retryNumber isn't actually used by the API, but this is | ||
}, mergeRequestOptions(this.requestOptions, customRequestOptions || {})); | ||
}, this.requestOptions, customRequestOptions); | ||
requestOptions.data = merge(requestOptions.data, params); | ||
const MULTIPART_THRESHOLD = 8000; | ||
var hasLongFields = false; | ||
@@ -340,17 +441,92 @@ // pre-process params: | ||
// with \x1f. | ||
// Copied from mw.Api().preprocessParameters & refactored to ES6 | ||
Object.entries(requestOptions.data).forEach(([key, val]) => { | ||
// Adapted from mw.Api().preprocessParameters | ||
Object.entries(params).forEach(([key, val]) => { | ||
if (Array.isArray(val)) { | ||
if (!val.join('').includes('|')) { | ||
requestOptions.data[key] = val.join('|'); | ||
params[key] = val.join('|'); | ||
} else { | ||
requestOptions.data[key] = '\x1f' + val.join('\x1f'); | ||
params[key] = '\x1f' + val.join('\x1f'); | ||
} | ||
} else if (val === false || val === undefined) { | ||
delete requestOptions.data[key]; | ||
} | ||
if (val === false || val === undefined) { | ||
delete params[key]; | ||
} else if (val === true) { | ||
params[key] = '1'; // booleans cause error with multipart/form-data requests | ||
} else if (String(params[key]).length > MULTIPART_THRESHOLD) { | ||
// use multipart/form-data if there are large fields, for better performance | ||
hasLongFields = true; | ||
} | ||
}); | ||
if (requestOptions.method === 'post') { | ||
// Shift the token to the end of the query string, to prevent | ||
// incomplete data sent from being accepted meaningfully by the server | ||
if (params.token) { | ||
let token = params.token; | ||
delete params.token; | ||
params.token = token; | ||
} | ||
var contentTypeGiven = customRequestOptions.headers && | ||
customRequestOptions.headers['Content-Type']; | ||
if ((hasLongFields && (!contentTypeGiven || contentTypeGiven === 'mulipart/form-data')) || contentTypeGiven === 'multipart/form-data') { | ||
// console.log('sending multipart POST request for action=' + params.action); | ||
// use multipart/form-data | ||
var form = new formData(); | ||
for (let [key, val] of Object.entries(params)) { | ||
if (val.stream) { | ||
form.append(key, val.stream, val.name); | ||
} else { | ||
form.append(key, val); | ||
} | ||
} | ||
requestOptions.data = form; | ||
requestOptions.headers = await new Promise((resolve, reject) => { | ||
form.getLength((err, length) => { | ||
if (err) { | ||
reject('Failed to get length of stream: ' + err); | ||
} | ||
resolve({ | ||
...requestOptions.headers, | ||
...form.getHeaders(), | ||
'Content-Length': length | ||
}); | ||
}); | ||
}); | ||
} else { | ||
// console.log('sending POST request for action=' + params.action); | ||
// use application/x-www-form-urlencoded (default) | ||
// requestOptions.data = params; | ||
requestOptions.data = Object.entries(params).map(([key, val]) => { | ||
return encodeURIComponent(key) + '=' + encodeURIComponent(val); | ||
}).join('&'); | ||
} | ||
} else { | ||
// console.log('sending GET request for action=' + params.action); | ||
// axios takes care of stringifying to URL query string | ||
requestOptions.params = params; | ||
} | ||
if (this.usingOAuth) { | ||
// OAuth authentication | ||
requestOptions.headers = { | ||
...requestOptions.headers, | ||
...this.makeOAuthHeader({ | ||
url: requestOptions.url, | ||
method: requestOptions.method, | ||
data: requestOptions.data instanceof formData ? {} : params | ||
}) | ||
}; | ||
} else { | ||
// BotPassword authentication | ||
requestOptions.jar = this.cookieJar; | ||
requestOptions.withCredentials = true; | ||
} | ||
return this.rawRequest(requestOptions).then((response) => { | ||
if (typeof response !== 'object') { | ||
if (params.format !== 'json') { | ||
throw new Error('must use format=json'); | ||
} | ||
let err = new Error('invalidjson: No valid JSON response'); | ||
@@ -360,37 +536,65 @@ err.code = 'invalidjson'; | ||
err.response = response; | ||
return Promise.reject(err) ; | ||
return Promise.reject(err); | ||
} | ||
const refreshTokenAndRetry = () => { | ||
return Promise.all( | ||
[this.getTokenType(params.action), this.getTokens()] | ||
).then(([tokentype]) => { | ||
if (!tokentype || !this.state[tokentype + 'token']) { | ||
return this.dieWithError(response, requestOptions); | ||
} | ||
var token = this.state[ tokentype + 'token' ]; | ||
params.token = token; | ||
return this.request(params, customRequestOptions); | ||
}); | ||
}; | ||
// See https://www.mediawiki.org/wiki/API:Errors_and_warnings#Errors | ||
if (response.error) { | ||
// This will not work if the token type to be used is defined by an | ||
// extension, and not a part of mediawiki core | ||
if (response.error.code === 'badtoken') { | ||
return Promise.all( | ||
[this.getTokenType(requestOptions.data.action), this.getTokens()] | ||
).then(([tokentype]) => { | ||
var token = this.state[ (tokentype || 'csrf') + 'token' ]; | ||
requestOptions.data.token = token; | ||
return this.request({}, requestOptions); | ||
}); | ||
} | ||
if (requestOptions.retryNumber < this.options.maxRetries) { | ||
customRequestOptions.retryNumber = requestOptions.retryNumber + 1; | ||
// Handle maxlag, see https://www.mediawiki.org/wiki/Manual:Maxlag_parameter | ||
if (response.error.code === 'maxlag' && requestOptions.retryNumber < this.options.maxlagMaxRetries) { | ||
log(`[W] Encountered maxlag error, waiting for ${this.options.maxlagPause/1000} seconds before retrying`); | ||
return sleep(this.options.maxlagPause).then(() => { | ||
requestOptions.retryNumber++; | ||
return this.request({}, requestOptions); | ||
}); | ||
switch (response.error.code) { | ||
// This will not work if the token type to be used is defined by an | ||
// extension, and not a part of mediawiki core | ||
case 'badtoken': | ||
log(`[W] Encountered badtoken error, fetching new token and retrying`); | ||
return refreshTokenAndRetry(); | ||
case 'readonly': | ||
case 'maxlag': | ||
// Handle maxlag, see https://www.mediawiki.org/wiki/Manual:Maxlag_parameter | ||
log(`[W] Encountered ${response.error.code} error, waiting for ${this.options.retryPause/1000} seconds before retrying`); | ||
return sleep(this.options.retryPause).then(() => { | ||
return this.request(params, customRequestOptions); | ||
}); | ||
case 'assertbotfailed': | ||
case 'assertuserfailed': | ||
// this shouldn't have happened if we're using OAuth | ||
if (this.usingOAuth) { | ||
return this.dieWithError(response, requestOptions); | ||
} | ||
// Possibly due to session loss: retry after logging in again | ||
log(`[W] Received ${response.error.code}, attempting to log in and retry`); | ||
return this.login().then(() => { | ||
if (params.token) { | ||
return refreshTokenAndRetry(); | ||
} else { | ||
return this.request(params, customRequestOptions); | ||
} | ||
}); | ||
default: | ||
return this.dieWithError(response, requestOptions); | ||
} | ||
} else { | ||
return this.dieWithError(response, requestOptions); | ||
} | ||
let err = new Error(response.error.code + ': ' + response.error.info); | ||
// Enhance error object with additional information | ||
err.errorResponse = true; | ||
err.code = response.error.code; | ||
err.info = response.error.info; | ||
err.response = response; | ||
err.request = requestOptions; | ||
return Promise.reject(err); | ||
} | ||
@@ -400,2 +604,15 @@ | ||
}, error => { | ||
if (!error.disableRetry && requestOptions.retryNumber < this.options.maxRetries) { | ||
// error might be transient, give it another go! | ||
log(`[W] Encountered ${error}, retrying in ${this.options.retryPause/1000} seconds`); | ||
customRequestOptions.retryNumber = requestOptions.retryNumber + 1; | ||
return sleep(this.options.retryPause).then(() => { | ||
return this.request(params, customRequestOptions); | ||
}); | ||
} | ||
error.request = requestOptions; | ||
return Promise.reject(error); | ||
}); | ||
@@ -405,3 +622,15 @@ | ||
/** @private */ | ||
dieWithError(response, requestOptions) { | ||
var err = new Error(response.error.code + ': ' + response.error.info); | ||
// Enhance error object with additional information | ||
err.errorResponse = true; | ||
err.code = response.error.code; | ||
err.info = response.error.info; | ||
err.response = response; | ||
err.request = requestOptions; | ||
return Promise.reject(err); | ||
} | ||
/************** CORE FUNCTIONS *******************/ | ||
@@ -415,3 +644,3 @@ | ||
* | ||
* @param {object} [loginOptions] - object containing the apiUrl, username, | ||
* @param {Object} [loginOptions] - object containing the apiUrl, username, | ||
* and password | ||
@@ -448,3 +677,3 @@ * | ||
log('[E] [mwn] Login failed with invalid response: ' + loginString); | ||
return Promise.reject(err) ; | ||
return Promise.reject(err); | ||
} | ||
@@ -480,3 +709,3 @@ this.state = merge(this.state, response.query.tokens); | ||
log('[E] [mwn] Login failed: ' + loginString); | ||
return Promise.reject(err) ; | ||
return Promise.reject(err); | ||
@@ -489,3 +718,3 @@ }); | ||
* Log out of the account | ||
* @returns {Promise} - resolved with an empty object if successful | ||
* @returns {Promise<void>} | ||
*/ | ||
@@ -496,3 +725,8 @@ logout() { | ||
token: this.csrfToken | ||
}); // returns an empty response if successful | ||
}).then(() => { // returns an empty response if successful | ||
this.loggedIn = false; | ||
this.cookieJar.removeAllCookiesSync(); | ||
this.state = {}; | ||
this.csrfToken = '%notoken%'; | ||
}); | ||
} | ||
@@ -504,3 +738,3 @@ | ||
* where mwn needs to be used without logging in. | ||
* @returns {Promise} | ||
* @returns {Promise<void>} | ||
*/ | ||
@@ -527,2 +761,3 @@ getSiteInfo() { | ||
}).then((response) => { | ||
// console.log('getTokens response:', response); | ||
if (response.query && response.query.tokens) { | ||
@@ -534,3 +769,3 @@ this.csrfToken = response.query.tokens.csrftoken; | ||
err.response = response; | ||
return Promise.reject(err) ; | ||
return Promise.reject(err); | ||
} | ||
@@ -551,2 +786,25 @@ }); | ||
/** | ||
* Get the tokens and siteinfo in one request | ||
* @returns {Promise<void>} | ||
*/ | ||
getTokensAndSiteInfo() { | ||
return this.request({ | ||
action: 'query', | ||
meta: 'siteinfo|tokens', | ||
siprop: 'general|namespaces|namespacealiases', | ||
type: 'csrf|createaccount|login|patrol|rollback|userrights|watch' | ||
}).then(response => { | ||
Title.processNamespaceData(response); | ||
if (response.query && response.query.tokens) { | ||
this.csrfToken = response.query.tokens.csrftoken; | ||
this.state = merge(this.state, response.query.tokens); | ||
} else { | ||
let err = new Error('Could not get token'); | ||
err.response = response; | ||
return Promise.reject(err); | ||
} | ||
}); | ||
} | ||
/** | ||
* Get type of token to be used with an API action | ||
@@ -569,7 +827,7 @@ * @param {string} action - API action parameter | ||
* @param loginOptions | ||
* @returns {Promise<string>} | ||
* @returns {Promise<void>} | ||
*/ | ||
loginGetToken(loginOptions) { | ||
return this.login(loginOptions).then(() => { | ||
return this.getCsrfToken(); | ||
return this.getTokens(); | ||
}); | ||
@@ -580,3 +838,3 @@ } | ||
* Get the wiki's server time | ||
* @returns {string} | ||
* @returns {Promise<string>} | ||
*/ | ||
@@ -592,5 +850,60 @@ getServerTime() { | ||
/** | ||
* Fetch and parse a JSON wikipage | ||
* @param {string} title - page title | ||
* @returns {Promise<Object>} parsed JSON object | ||
*/ | ||
parseJsonPage(title) { | ||
return this.read(title).then(data => { | ||
try { | ||
return JSON.parse(data.revisions[0].content); | ||
} catch(e) { | ||
return Promise.reject('invalidjson'); | ||
} | ||
}); | ||
} | ||
/***************** HELPER FUNCTIONS ******************/ | ||
/** | ||
* Reads the content / and meta-data of one (or many) pages | ||
* | ||
* @param {string|string[]|number|number[]} titles - for multiple pages use an array | ||
* @param {Object} [options] | ||
* | ||
* @returns {Promise} | ||
*/ | ||
read(titles, options) { | ||
return this.massQuery(merge({ | ||
action: 'query', | ||
prop: 'revisions', | ||
rvprop: 'content|timestamp', | ||
redirects: '1' | ||
}, makeTitles(titles), options), | ||
typeof titles[0] === 'number' ? 'pageids' : 'titles').then(jsons => { | ||
var data = jsons.reduce((data, json) => { | ||
return data.concat(json.query.pages); | ||
}, []); | ||
return data.length === 1 ? data[0] : data; | ||
}); | ||
} | ||
async *readGen(titles, options) { | ||
let massQueryResponses = this.massQueryGen(merge({ | ||
action: 'query', | ||
prop: 'revisions', | ||
rvprop: 'content', | ||
redirects: '1' | ||
}, makeTitles(titles), options), | ||
typeof titles[0] === 'number' ? 'pageids' : 'titles'); | ||
for await (let response of massQueryResponses) { | ||
if (response && response.query && response.query.pages) { | ||
for (let pg of response.query.pages) { | ||
yield pg; | ||
} | ||
} | ||
} | ||
} | ||
// adapted from mw.Api().edit | ||
@@ -604,7 +917,14 @@ /** | ||
* those. | ||
* @param {number} [conflictRetries=2] - max number of times to retry edit on | ||
* encountering an edit conflict (default 2) | ||
* @param {Object} [editConfig] - Overridden edit options. Available options: | ||
* conflictRetries, suppressNochangeWarning, exclusionRegex | ||
* @config conflictRetries - maximum number of times to retry edit after encountering edit | ||
* conflicts. | ||
* @config suppressNochangeWarning - don't show the warning when no change is actually | ||
* made to the page on an successful edit | ||
* @config exclusionRegex - don't edit if the page text matches this regex. Used for bot | ||
* per-page exclusion compliance. | ||
* @return {Promise<Object>} Edit API response | ||
*/ | ||
edit(title, transform, conflictRetries=2) { | ||
edit(title, transform, editConfig) { | ||
editConfig = editConfig || this.options.editConfig; | ||
@@ -636,2 +956,6 @@ var basetimestamp, curtimestamp; | ||
if (editConfig.exclusionRegex && editConfig.exclusionRegex.test(revision.content)) { | ||
return Promise.reject('bot-denied'); | ||
} | ||
return transform({ | ||
@@ -659,6 +983,13 @@ timestamp: revision.timestamp, | ||
}).then(data => { | ||
if (data.edit && data.edit.nochange && !editConfig.suppressNochangeWarning) { | ||
log(`[W] No change from edit to ${data.edit.title}`); | ||
} | ||
if (!data.edit) { | ||
log(`[W] Unusual API success response: ` + JSON.stringify(data,undefined,2)); | ||
} | ||
return data.edit; | ||
}, errorCode => { | ||
if (errorCode === 'editconflict' && conflictRetries > 0) { | ||
return this.edit(title, transform, conflictRetries - 1); | ||
if (errorCode === 'editconflict' && editConfig.conflictRetries > 0) { | ||
editConfig.conflictRetries--; | ||
return this.edit(title, transform, editConfig); | ||
} | ||
@@ -703,3 +1034,3 @@ }); | ||
summary: summary, | ||
createonly: true, | ||
createonly: '1', | ||
token: this.csrfToken | ||
@@ -728,24 +1059,2 @@ }, options)).then(data => data.edit); | ||
/** | ||
* Reads the content / and meta-data of one (or many) pages | ||
* | ||
* @param {string|string[]|number|number[]} titles - for multiple pages use an array | ||
* @param {object} [options] | ||
* | ||
* @returns {Promise} | ||
*/ | ||
read(titles, options) { | ||
return this.massQuery(merge({ | ||
action: 'query', | ||
prop: 'revisions', | ||
rvprop: 'content', | ||
redirects: '1' | ||
}, makeTitles(titles), options), | ||
typeof titles[0] === 'number' ? 'pageids' : 'titles').then(jsons => { | ||
var data = jsons.reduce((data, json) => { | ||
return data.concat(json.query.pages); | ||
}, []); | ||
return data.length === 1 ? data[0] : data; | ||
}); | ||
} | ||
@@ -844,3 +1153,38 @@ /** | ||
/** | ||
* Upload an image from a the local disk to the wiki. | ||
* If a file with the same name exists, it will be over-written. | ||
* @param {string} filepath | ||
* @param {string} title | ||
* @param {string} text | ||
* @param {object} options | ||
* @returns {Promise<Object>} | ||
*/ | ||
upload(filepath, title, text, options) { | ||
return this.request(merge({ | ||
action: 'upload', | ||
file: { | ||
stream: fs.createReadStream(filepath), | ||
name: path.basename(filepath) | ||
}, | ||
filename: title, | ||
text: text, | ||
ignorewarnings: '1', | ||
token: this.csrfToken | ||
}, options), { | ||
headers: { | ||
'Content-Type': 'multipart/form-data' | ||
} | ||
}).then(data => { | ||
if (data.upload.warnings) { | ||
log('[W] The API returned warnings while uploading to ' + title + ':'); | ||
log(data.upload.warnings); | ||
} | ||
return data.upload; | ||
}); | ||
} | ||
/** | ||
* Upload an image from a web URL to the wiki | ||
@@ -853,2 +1197,3 @@ * If a file with the same name exists, it will be over-written, | ||
* @param {Object} options | ||
* @returns {Promise<Object>} | ||
*/ | ||
@@ -864,2 +1209,6 @@ uploadFromUrl(url, title, text, options) { | ||
}, options)).then(data => { | ||
if (data.upload.warnings) { | ||
log('[W] The API returned warnings while uploading to ' + title + ':'); | ||
log(data.upload.warnings); | ||
} | ||
return data.upload; | ||
@@ -917,13 +1266,7 @@ }); | ||
rollback(page, user, params) { | ||
return this.request({ | ||
action: 'query', | ||
meta: 'tokens', | ||
type: 'rollback' | ||
}).then(data => { | ||
return this.request(merge({ | ||
action: 'rollback', | ||
user: user, | ||
token: data.query.tokens.rollbacktoken | ||
}, makeTitle(page), params)); | ||
}).then(data => { | ||
return this.request(merge({ | ||
action: 'rollback', | ||
user: user, | ||
token: this.state.rollbacktoken | ||
}, makeTitle(page), params)).then(data => { | ||
return data.rollback; | ||
@@ -985,2 +1328,23 @@ }); | ||
/** | ||
* Search the wiki. | ||
* @param {string} searchTerm | ||
* @param {number} limit | ||
* @param {("size"|"timestamp"|"worcount"|"snippet"|"redirectitle"|"sectiontitle"| | ||
* "redirectsnippet"|"titlesnippet"|"sectionsnippet"|"categorysnippet")[]} props | ||
* @param {Object} otherParams | ||
* @returns {Promise<Object>} | ||
*/ | ||
search(searchTerm, limit, props, otherParams) { | ||
return this.request(merge({ | ||
action: 'query', | ||
list: 'search', | ||
srsearch: searchTerm, | ||
srlimit: limit, | ||
srprop: props || 'size|worcount|timestamp', | ||
}, otherParams)).then(data => { | ||
return data.query.search; | ||
}); | ||
} | ||
/************* BULK PROCESSING FUNCTIONS ************/ | ||
@@ -1015,2 +1379,19 @@ | ||
/** | ||
* Generator to iterate through API response continuations. | ||
* @generator | ||
* @param {Object} query | ||
* @param {number} [limit=10] | ||
* @yields {Object} a single page of the response | ||
*/ | ||
async *continuedQueryGen(query, limit=10) { | ||
let response = { continue: {} }; | ||
for (let i = 0; i < limit; i++) { | ||
if (response.continue) { | ||
response = await this.request(merge(query, response.continue)); | ||
yield response; | ||
} | ||
} | ||
} | ||
/** | ||
* Function for using API action=query with more than 50/500 items in multi- | ||
@@ -1031,2 +1412,5 @@ * input fields. | ||
* | ||
* The API calls are made via POST instead of GET to avoid potential 414 (URI | ||
* too long) errors. | ||
* | ||
* This assumes that the user has the apihighlimits user right, available to bots | ||
@@ -1063,3 +1447,3 @@ * and sysops by default. If your account does not have the right, you MUST set | ||
query[batchFieldName] = batches[idx]; | ||
this.request(query).then(response => { | ||
this.request(query, { method: 'post' }).then(response => { | ||
responses[idx] = response; | ||
@@ -1081,2 +1465,19 @@ }, err => { | ||
/** | ||
* Generator version of massQuery(). Iterate through pages of API results. | ||
* @param {Object} query | ||
* @param {string} batchFieldName | ||
*/ | ||
async *massQueryGen(query, batchFieldName='titles') { | ||
var batchValues = query[batchFieldName]; | ||
var limit = this.options.hasApiHighLimit ? 500 : 50; | ||
var batches = arrayChunk(batchValues, limit); | ||
var numBatches = batches.length; | ||
for (let i = 0; i < numBatches; i++) { | ||
query[batchFieldName] = batches[i]; | ||
yield await this.request(query, { method: 'post' }); | ||
} | ||
} | ||
/** | ||
* Execute an asynchronous function on a large number of pages (or other arbitrary | ||
@@ -1298,33 +1699,71 @@ * items). Designed for working with promises. | ||
/****************** UTILITIES *****************/ | ||
/** | ||
* Prints status information about a completed request | ||
* | ||
* @param status | ||
* @param currentCounter | ||
* @param totalCounter | ||
* @param operation | ||
* @param pageName | ||
* @param reason | ||
* Gets ORES predictions from revision IDs | ||
* @param {string} endpointUrl | ||
* @param {string[]|string} models | ||
* @param {string[]|number[]|string|number} revision ID(s) | ||
*/ | ||
static logStatus(status, currentCounter, totalCounter, operation, pageName, reason) { | ||
oresQueryRevisions(endpointUrl, models, revisions) { | ||
var response = {}; | ||
var chunks = arrayChunk( | ||
(revisions instanceof Array) ? revisions : [ revisions ], | ||
50 | ||
); | ||
return this.seriesBatchOperation(chunks, (chunk) => { | ||
return this.rawRequest({ | ||
method: 'get', | ||
url: endpointUrl, | ||
params: { | ||
models: models.join('|'), | ||
revids: chunk.join('|') | ||
}, | ||
responseType: 'json' | ||
}).then(data => { | ||
Object.assign(response, Object.values(data)[0].scores); | ||
}); | ||
}, 0, 2).then(() => { | ||
return response; | ||
}); | ||
} | ||
operation = operation || ''; | ||
} | ||
if (operation) { | ||
operation = ' [' + operation.toUpperCase() + ']'; | ||
operation = (operation + ' ').substring(0, 12); // Right space padding: http://stackoverflow.com/a/24398129 | ||
} | ||
mwn.requestDefaults = { | ||
headers: { | ||
'Accept-Encoding': 'gzip' | ||
}, | ||
reason = reason || ''; | ||
if (reason) { | ||
reason = ' (' + reason + ')'; | ||
} | ||
// keep-alive pools and reuses TCP connections, for better performance | ||
httpAgent: new http.Agent({ keepAlive: true }), | ||
httpsAgent: new https.Agent({ keepAlive: true }), | ||
log(status + '[' + semlog.pad(currentCounter, 4) + '/' + semlog.pad(totalCounter, 4) + ']' + operation + pageName + reason); | ||
timeout: 60000, // 60 seconds | ||
}; | ||
// Bind static utilities | ||
Object.assign(mwn, static_utils); | ||
// Expose semlog | ||
mwn.log = log; | ||
/**** Private utilities ****/ | ||
/** Check whether object looks like a promises-A+ promise, from https://www.npmjs.com/package/is-promise */ | ||
var ispromise = function (obj) { | ||
return !!obj && (typeof obj === 'object' || typeof obj === 'function') && | ||
typeof obj.then === 'function'; | ||
}; | ||
/** Check whether an object is plain object, from https://github.com/sindresorhus/is-plain-obj/blob/master/index.js */ | ||
var isplainobject = function(value) { | ||
if (Object.prototype.toString.call(value) !== '[object Object]') { | ||
return false; | ||
} | ||
} | ||
const prototype = Object.getPrototypeOf(value); | ||
return prototype === null || prototype === Object.prototype; | ||
}; | ||
/** | ||
@@ -1344,20 +1783,35 @@ * Simple wrapper around Object.assign to merge objects. null and undefined | ||
/** | ||
* Merge objects deeply to 1 level. Object properties like params, form, | ||
* header get merged. But not any object properties within them. | ||
* Merge objects deeply to 1 level. Object properties like params, data, | ||
* headers get merged. But not any object properties within them. | ||
* Arrays are not merged, but over-written (as if it were a primitive) | ||
* The original object is mutated and returned. | ||
* The first object is mutated and returned. | ||
* @param {...Object} - any number of objects | ||
* @returns {Object} | ||
*/ | ||
var mergeRequestOptions = function(options, customOptions) { | ||
Object.entries(customOptions).forEach(([key, val]) => { | ||
if (typeof val === 'object' && !Array.isArray(val)) { | ||
options[key] = merge(options[key], val); | ||
// this can't be written as Object.assign(options[key], val) | ||
// as options[key] could be undefined | ||
} else { | ||
options[key] = val; | ||
var mergeDeep1 = function(...objects) { | ||
let args = [...objects].filter(e => e); // skip null/undefined values | ||
for (let options of args.slice(1)) { | ||
for (let [key, val] of Object.entries(options)) { | ||
if (isplainobject(val)) { | ||
args[0][key] = merge(args[0][key], val); | ||
// this can't be written as Object.assign(args[0][key], val) | ||
// as args[0][key] could be undefined | ||
} else { | ||
args[0][key] = val; | ||
} | ||
} | ||
}); | ||
return options; | ||
} | ||
return args[0]; | ||
}; | ||
/** @param {Array} arr, @param {number} size */ | ||
var arrayChunk = function(arr, size) { | ||
var numChunks = Math.ceil(arr.length / size); | ||
var result = new Array(numChunks); | ||
for (let i=0; i<numChunks; i++) { | ||
result[i] = arr.slice(i * size, (i + 1) * size); | ||
} | ||
return result; | ||
}; | ||
/** | ||
@@ -1368,3 +1822,3 @@ * Promisified version of setTimeout | ||
var sleep = function(duration) { | ||
return new Promise((resolve) => { | ||
return new Promise(resolve => { | ||
setTimeout(resolve, duration); | ||
@@ -1392,2 +1846,2 @@ }); | ||
module.exports = Bot; | ||
module.exports = mwn; |
135
src/page.js
@@ -11,2 +11,3 @@ module.exports = function(bot) { | ||
} | ||
this.data = {}; | ||
} | ||
@@ -39,3 +40,6 @@ | ||
"prop": "wikitext" | ||
}).then(data => data.parse.wikitext); | ||
}).then(data => { | ||
this.data.text = data.parse.wikitext; | ||
return data.parse.wikitext; | ||
}); | ||
} | ||
@@ -83,12 +87,22 @@ | ||
// backlinks() { | ||
// XXX: FIX UP continuedQuery first | ||
// return bot.continuedQuery({ | ||
// "action": "query", | ||
// "prop": "linkshere", | ||
// "titles": this.toString(), | ||
// "lhprop": "title", | ||
// "lhlimit": "max" | ||
// }).then(data => data.query.pages[0].linkshere.map(pg => pg.title)); | ||
//} | ||
/** | ||
* Get list of pages linking to this page | ||
* @returns {Promise<String[]>} | ||
*/ | ||
backlinks() { | ||
return bot.continuedQuery({ | ||
"action": "query", | ||
"prop": "linkshere", | ||
"titles": this.toString(), | ||
"lhprop": "title", | ||
"lhlimit": "max" | ||
}).then(jsons => { | ||
var pages = jsons.reduce((pages, json) => pages.concat(json.query.pages), []); | ||
var page = pages[0]; | ||
if (page.missing) { | ||
return Promise.reject('missingarticle'); | ||
} | ||
return page.linkshere.map(pg => pg.title); | ||
}); | ||
} | ||
@@ -137,4 +151,78 @@ | ||
/** | ||
* Check if page is redirect or not | ||
* @returns {Promise<boolean>} | ||
*/ | ||
isRedirect() { | ||
return this.getRedirectTarget().then(target => { | ||
return this.toText() !== target; | ||
}); | ||
} | ||
/** | ||
* Get redirect target. | ||
* Returns the same page name if the page is not a redirect. | ||
* @returns {Promise<string>} | ||
*/ | ||
getRedirectTarget() { | ||
if (this.data.text) { | ||
var target = /^\s*#redirect \[\[(.*?)\]\]/.exec(this.data.text); | ||
if (!target) { | ||
return this.toText(); | ||
} | ||
return Promise.resolve(new bot.title(target[1]).toText()); | ||
} | ||
return bot.request({ | ||
action: 'query', | ||
titles: this.toString(), | ||
redirects: '1', | ||
}).then(data => { | ||
var page = data.query.pages[0]; | ||
if (page.missing) { | ||
return Promise.reject('missingarticle'); | ||
} | ||
return page.title; | ||
}); | ||
} | ||
/** | ||
* Get username of the page creator | ||
* @returns {Promise<string>} | ||
*/ | ||
getCreator() { | ||
return bot.request({ | ||
action: 'query', | ||
titles: this.toString(), | ||
prop: 'revisions', | ||
rvprop: 'user', | ||
rvlimit: 1, | ||
rvdir: 'newer' | ||
}).then(data => { | ||
var page = data.query.pages[0]; | ||
if (page.missing) { | ||
return Promise.reject('missingarticle'); | ||
} | ||
return page.revisions[0].user; | ||
}); | ||
} | ||
getDeletingAdmin() { | ||
return bot.request({ | ||
action: "query", | ||
list: "logevents", | ||
leaction: "delete/delete", | ||
letitle: this.toString(), | ||
lelimit: 1 | ||
}).then(data => { | ||
var logs = data.query.logevents; | ||
if (logs.length === 0) { | ||
return null; | ||
} | ||
return logs[0].user; | ||
}); | ||
} | ||
/** | ||
* Get the edit history of the page | ||
* @param {Array} props - revision properties to fetch, by default content is | ||
* @param {revisionprop[]} props - revision properties to fetch, by default content is | ||
* excluded | ||
@@ -155,2 +243,6 @@ * @param {number} [limit=50] - number of revisions to fetch data about | ||
}, customOptions)).then(data => { | ||
var page = data.query.pages[0]; | ||
if (page.missing) { | ||
return Promise.reject('missingarticle'); | ||
} | ||
return data.query.pages[0].revisions; | ||
@@ -162,3 +254,3 @@ }); | ||
* Get the page logs. | ||
* @param {Array} props - data about log entries to fetch | ||
* @param {logprop[]} props - data about log entries to fetch | ||
* @param {number} limit - max number of log entries to fetch | ||
@@ -201,2 +293,6 @@ * @param {string} type - type of log to fetch, can either be an letype or leaction | ||
save(text, summary, options) { | ||
return bot.save(this.toString(), text, summary, options); | ||
} | ||
newSection(header, message, additionalParams) { | ||
@@ -228,1 +324,14 @@ return bot.newSection(this.toString(), header, message, additionalParams); | ||
}; | ||
// Type definitions for JSDocs | ||
/** | ||
* @typedef {"content" | "timestamp" | "user" | "comment" | "parsedcomment" | "ids" | "flags" | | ||
* "size" | "tags" | "userid" | "contentmodel"} revisionprop | ||
*/ | ||
/** | ||
* @typedef {"type" | "user" | "comment" | "details" | "timestamp" | "title" | "parsedcomment" | | ||
* "ids" | "tags" | "userid"} logprop | ||
*/ |
@@ -232,3 +232,3 @@ /** | ||
rInvalid = new RegExp( | ||
'[^' + Title.legaltitlechars + ']' + | ||
'[^' + " %!\"$&'()*,\\-./0-9:;=?@A-Z\\\\\\^_`a-z~+\\u0080-\\uFFFF" + ']' + | ||
// URL percent encoding sequences interfere with the ability | ||
@@ -235,0 +235,0 @@ // to round-trip titles -- you can't link to them consistently. |
@@ -14,2 +14,4 @@ module.exports = function(bot) { | ||
get userpage() { | ||
// User: namespace name will work across all MW sites | ||
// as it is a canonical namespace name | ||
return new bot.page('User:' + this.username); | ||
@@ -91,3 +93,3 @@ } | ||
text: message, | ||
token: bot.csrfToken | ||
token: this.csrfToken | ||
}).then(data => data.emailuser); | ||
@@ -94,0 +96,0 @@ } |
@@ -1,8 +0,8 @@ | ||
module.exports = function(bot) { | ||
module.exports = function (bot) { | ||
/** | ||
* Class for some basic wikitext parsing, involving | ||
* links, files, categories and templates. | ||
* | ||
* For more advanced and sophisticated wikitext parsing, use can | ||
* Class for some basic wikitext parsing, involving | ||
* links, files, categories, templates and simple tables. | ||
* | ||
* For more advanced and sophisticated wikitext parsing, use | ||
* mwparserfromhell <https://github.com/earwig/mwparserfromhell> | ||
@@ -12,2 +12,5 @@ * implemented in python (which you can use within node.js using | ||
* recognize localised namespaces and wiki-specific configs. | ||
* | ||
* This class is for methods for parsing wikitext, for the | ||
* static methods for creating wikitext, see static_utils.js. | ||
*/ | ||
@@ -18,3 +21,12 @@ class Wikitext { | ||
constructor(wikitext) { | ||
if (typeof wikitext !== 'string') { | ||
throw new Error('non-string constructor for wikitext class'); | ||
} | ||
this.text = wikitext; | ||
this.unbinder = { | ||
counter: 0, | ||
history: {}, | ||
prefix: '%UNIQ::' + Math.random() + '::', | ||
postfix: '::UNIQ%' | ||
}; | ||
} | ||
@@ -31,7 +43,7 @@ | ||
var stack = new Stack(); | ||
for (let i=0; i<n; i++) { | ||
if (this.text[i] === '[' && this.text[i+1] === '[') { | ||
stack.push({startIdx: i }); | ||
for (let i = 0; i < n; i++) { | ||
if (this.text[i] === '[' && this.text[i + 1] === '[') { | ||
stack.push({ startIdx: i }); | ||
i++; | ||
} else if (this.text[i] === ']' && this.text[i+1] === ']' && stack.top()) { | ||
} else if (this.text[i] === ']' && this.text[i + 1] === ']' && stack.top()) { | ||
stack.top().endIdx = i + 1; | ||
@@ -59,24 +71,22 @@ processLink(this, stack.top().startIdx, stack.top().endIdx); | ||
} | ||
* @param {number} [count] - maximum number of templates to parse (default infinite) | ||
* @return Template[] | ||
* @param {string} wikitext | ||
* @param {{recursive: boolean, namePredicate: function, templatePredicate: function, | ||
* count: number}} config | ||
* @config {boolean} recursive - also parse templates within subtemplates | ||
* @config {function} namePredicate - include template in result only if the its name matches this predicate | ||
* More efficient than templatePredicate as the template parameters aren't parsed if name didn't match. | ||
* @config {function} templatePredicate - include template in result only if it matches this predicate | ||
* @config {number} count - max number of templates to be parsed. If recursive is set true, note that | ||
* templates are parsed breadth-first, not depth-first. | ||
* @returns {Template[]} | ||
*/ | ||
parseTemplates(count) { | ||
return this.templates = parseTemplates(this.text, false, count); | ||
parseTemplates(config) { | ||
return this.templates = parseTemplates(this.text, config); | ||
} | ||
/** | ||
* Also parse templates that occur within other templates, rather than just top-level templates. | ||
* @param {number} [depth=true] - specify a number to limit recursive parsing to a the given recursion | ||
* depth. For infinite depth, specify `true` (default). Eg. with recursive=1, all templates and | ||
* sub-templates will be parsed, but not the templates within the sub-templates | ||
*/ | ||
parseTemplatesRecursive(depth) { | ||
return this.templates = parseTemplates(this.text, depth || true); | ||
} | ||
/** | ||
* Remove a template, link, file or category from the text | ||
* CAUTION: If an entity with the very same wikitext exists earlier in the text, | ||
* CAUTION: If an entity with the very same wikitext exists earlier in the text, | ||
* that one will be removed instead. | ||
* @param {Object|Template} entity - anything with a wikitext attribute | ||
* @param {Object|Template} entity - anything with a wikitext attribute | ||
* and end index | ||
@@ -88,2 +98,42 @@ */ | ||
/** | ||
* Temporarily hide a part of the string while processing the rest of it. | ||
* | ||
* eg. let u = new bot.wikitext("Hello world <!-- world --> world"); | ||
* u.unbind('<!--','-->'); | ||
* u.content = u.content.replace(/world/g, 'earth'); | ||
* u.rebind(); // gives "Hello earth <!-- world --> earth" | ||
* | ||
* Text within the 'unbinded' part (in this case, the HTML comment) remains intact | ||
* unbind() can be called multiple times to unbind multiple parts of the string. | ||
* | ||
* Attribution: https://en.wikipedia.org/wiki/MediaWiki:Gadget-morebits.js (cc-by-sa 3.0/GFDL) | ||
* @param {string} prefix | ||
* @param {string} postfix | ||
*/ | ||
unbind(prefix, postfix) { | ||
let re = new RegExp(prefix + '([\\s\\S]*?)' + postfix, 'g'); | ||
this.text = this.text.replace(re, match => { | ||
let current = this.unbinder.prefix + this.unbinder.counter + this.unbinder.postfix; | ||
this.unbinder.history[current] = match; | ||
++this.unbinder.counter; | ||
return current; | ||
}); | ||
} | ||
/** | ||
* Rebind after unbinding. | ||
* @returns {string} The output | ||
*/ | ||
rebind() { | ||
let content = this.text; | ||
content.self = this; | ||
for (let [current, replacement] of Object.entries(this.unbinder.history)) { | ||
content = content.replace(current, replacement); | ||
} | ||
this.text = content; | ||
return this.text; | ||
} | ||
/** Get the updated text @returns {string} */ | ||
@@ -104,35 +154,128 @@ getText() { | ||
/** | ||
* Get wikitext for a new link | ||
* @param {string|bot.title} target | ||
* @param {string} [displaytext] | ||
* Simple table parser. | ||
* Parses tables provided: | ||
* 1. It doesn't have any merged or joined cells. | ||
* 2. It doesn't use any templates to produce any table markup. | ||
* 3. Further restrictions may apply. | ||
* | ||
* Tables generated via mwn.table() class are intended to be parsable. | ||
* | ||
* This method throws when it finds an inconsistency (rather than silently | ||
* cause undesired behaviour). | ||
* | ||
* @param {string} text | ||
* @returns {Object[]} - each object in the returned array represents a row, | ||
* with its keys being column names, and values the cell content | ||
*/ | ||
static link(target, displaytext) { | ||
if (target instanceof bot.title) { | ||
return '[[' + target.toText() + | ||
(target.fragment ? '#' + target.fragment : '') + | ||
(displaytext ? '|' + displaytext : '') + | ||
']]'; | ||
static parseTable(text) { | ||
text = text.trim(); | ||
const indexOfRawPipe = function (text) { | ||
// number of unclosed brackets | ||
let tlevel = 0, llevel = 0; | ||
let n = text.length; | ||
for (let i = 0; i < n; i++) { | ||
if (text[i] === '{' && text[i+1] === '{') { | ||
tlevel++; | ||
i++; | ||
} else if (text[i] === '[' && text[i+1] === '[') { | ||
llevel++; | ||
i++; | ||
} else if (text[i] === '}' && text[i+1] === '}') { | ||
tlevel--; | ||
i++; | ||
} else if (text[i] === ']' && text[i+1] === ']') { | ||
llevel--; | ||
i++; | ||
} else if (text[i] === '|' && tlevel === 0 && llevel === 0) { | ||
return i; | ||
} | ||
} | ||
}; | ||
if (!text.startsWith('{|') || !text.endsWith('|}')) { | ||
throw new Error('failed to parse table. Unexpected starting or ending'); | ||
} | ||
return '[[' + target + (displaytext ? '|' + displaytext : '') + ']]'; | ||
// remove front matter and final matter | ||
// including table attributes and caption, and unnecessary |- at the top | ||
text = text.replace(/^\{\|.*$((\n\|-)?\n\|\+.*$)?(\n\|-)?/m, '').replace(/^\|\}$/m, ''); | ||
let [header, ...rows] = text.split(/^\|-/m).map(r => r.trim()); | ||
// remove cell attributes, extracts data | ||
const extractData = (cell) => { | ||
return cell.slice(indexOfRawPipe(cell) + 1).trim(); | ||
}; | ||
// XXX: handle the case where there are is no header row | ||
let cols = header.split('\n').map(e => e.replace(/^!/, '')); | ||
if (cols.length === 1) { // non-multilined table? | ||
cols = cols[0].split('!!'); | ||
} | ||
cols = cols.map(extractData); | ||
let numcols = cols.length; | ||
let output = new Array(rows.length); | ||
rows.forEach((row, idx) => { | ||
let cells = row.split(/^\|/m).slice(1); // slice(1) removes the emptiness or the row styles if present | ||
if (cells.length === 1) { // non-multilined | ||
// cells are separated by || | ||
cells = cells[0].replace(/^\|/, '').split('||'); | ||
} | ||
cells = cells.map(extractData); | ||
if (cells.length !== numcols) { | ||
throw new Error(`failed to parse table: found ${cells.length} cells on row ${idx}, expected ${numcols}`); | ||
} | ||
output[idx] = {}; // output[idx] represents a row | ||
for (let i = 0; i < numcols; i++) { | ||
output[idx][cols[i]] = cells[i]; | ||
} | ||
}); | ||
return output; | ||
} | ||
/** | ||
* Get wikitext for a template usage | ||
* @param {string|bot.title} title | ||
* @param {Object} [options] - template parameters as object | ||
* Parse sections from wikitext | ||
* @param {string} text | ||
* @returns {{level: number, header: string, index: number, content: string}[]} array of | ||
* section objects. Each section object has the level, header, index (of beginning) and content. | ||
* Content *includes* the equal signs and the header. | ||
* The top is represented as level 1, with header `null`. | ||
*/ | ||
static template(title, options) { | ||
if (title instanceof bot.title) { | ||
if (title.namespace === 10) { | ||
title = title.getMainText(); // skip namespace name for templates | ||
} else if (title.namespace === 0) { | ||
title = ':' + title.toText(); // prefix colon for mainspace | ||
static parseSections(text) { | ||
const rgx = /^(=+)(.*?)\1/mg; | ||
let sections = [ | ||
{ | ||
level: 1, | ||
header: null, | ||
index: 0 | ||
} | ||
]; | ||
let match; | ||
while (match = rgx.exec(text)) { // eslint-disable-line no-cond-assign | ||
sections.push({ | ||
level: match[1].length, | ||
header: match[2].trim(), | ||
index: match.index | ||
}); | ||
} | ||
return '{{' + String(title) + | ||
Object.entries(options).map(([key, val]) => { | ||
return '|' + key + '=' + val; | ||
}).join('') + | ||
'}}'; | ||
let n = sections.length; | ||
for (let i = 0; i < n - 1; i++) { | ||
sections[i].content = text.slice(sections[i].index, sections[i + 1].index); | ||
} | ||
sections[n - 1].content = text.slice(sections[n - 1].index); | ||
return sections; | ||
} | ||
@@ -142,2 +285,4 @@ | ||
/**** Private members *****/ | ||
class Stack extends Array { | ||
@@ -148,4 +293,3 @@ top() { | ||
} | ||
var processLink = function(self, startIdx, endIdx) { | ||
var processLink = function (self, startIdx, endIdx) { | ||
var linktext = self.text.slice(startIdx, endIdx + 1); | ||
@@ -162,25 +306,24 @@ var [target, displaytext] = linktext.slice(2, -2).split('|'); | ||
} | ||
var linkobj = { | ||
wikitext: linktext, | ||
dsr: [startIdx, endIdx] // Note: data source ranges (dsr) are invalidated by any removeEntity() operation | ||
}; | ||
if (target[0] !== ':') { | ||
if (title.namespace === 6) { | ||
self.files.push(Object.assign({ | ||
self.files.push({ | ||
wikitext: linktext, | ||
target: title, | ||
props: linktext.slice(linktext.indexOf('|') + 1, -2) | ||
}, linkobj)); | ||
}); | ||
return; | ||
} else if (title.namespace === 14) { | ||
self.categories.push(Object.assign({ | ||
self.categories.push({ | ||
wikitext: linktext, | ||
target: title, | ||
sortkey: noSortkey ? '' : displaytext | ||
}, linkobj)); | ||
}); | ||
return; | ||
} | ||
} | ||
self.links.push(Object.assign({ | ||
target: title, | ||
self.links.push({ | ||
wikitext: linktext, | ||
target: title, | ||
displaytext: displaytext | ||
}, linkobj)); | ||
}); | ||
}; | ||
@@ -208,6 +351,4 @@ | ||
*/ | ||
constructor(wikitext, dsr) { | ||
constructor(wikitext) { | ||
this.wikitext = wikitext; | ||
// dsr stands for data source range, gives the starting and ending index in wikitext | ||
this.dsr = dsr, // an array of two numbers | ||
this.parameters = []; | ||
@@ -223,3 +364,3 @@ } | ||
getParam(paramName) { | ||
return this.parameters.find(function (p) { | ||
return this.parameters.find(p => { | ||
return p.name == paramName; | ||
@@ -238,19 +379,21 @@ }); | ||
// Copied from https://en.wikipedia.org/wiki/MediaWiki:Gadget-libExtraUtil.js | ||
// adapted by Evad37 from the original by written by me at | ||
// https://en.wikipedia.org/wiki/User:SD0001/parseAllTemplates.js (cc-by-sa-3.0/GFDL) | ||
// parseTemplates() and processTemplateText() are adapted from | ||
// https://en.wikipedia.org/wiki/MediaWiki:Gadget-libExtraUtil.js written by Evad37 | ||
// which was in turn adapted from https://en.wikipedia.org/wiki/User:SD0001/parseAllTemplates.js | ||
// written by me. (cc-by-sa/GFDL) | ||
/** | ||
* | ||
* @param {string} wikitext | ||
* @param {boolean|number} [recursive=false] - also parse templates within templates, | ||
* give a number to specify recursion depth. If given as `true`, infinite recursion | ||
* depth is assumed. | ||
* @param {number} [count] - stop parsing when this many templates have been found, | ||
* Recursive parsing does NOT work if count is specified. | ||
* @inheritdoc | ||
*/ | ||
var parseTemplates = function (wikitext, recursive, count) { | ||
const parseTemplates = function (wikitext, config) { | ||
config = config || { | ||
recursive: false, | ||
namePredicate: null, | ||
templatePredicate: null, | ||
count: null | ||
}; | ||
var result = []; | ||
const result = []; | ||
var n = wikitext.length; | ||
const n = wikitext.length; | ||
@@ -284,4 +427,7 @@ // number of unclosed braces | ||
var templateWikitext = wikitext.slice(startIdx, endIdx); // without braces | ||
result.push(processTemplateText(templateWikitext, [startIdx - 2, endIdx + 1])); | ||
if (count && result.length === count) { | ||
var processed = processTemplateText(templateWikitext, config.namePredicate, config.templatePredicate); | ||
if (processed) { | ||
result.push(processed); | ||
} | ||
if (config.count && result.length === config.count) { | ||
return result; | ||
@@ -321,11 +467,10 @@ } | ||
if (recursive && !count) { | ||
var subtemplates = result.map(function (template) { | ||
if (config.recursive) { | ||
var subtemplates = result.map(template => { | ||
return template.wikitext.slice(2, -2); | ||
}).filter(function (templateWikitext) { | ||
return /\{\{(?:.|\n)*\}\}/.test(templateWikitext); | ||
}).map(function (templateWikitext) { | ||
return parseTemplates(templateWikitext, recursive === true ? true : recursive - 1); | ||
}).filter(templateWikitext => { | ||
return /\{\{.*\}\}/s.test(templateWikitext); | ||
}).map(templateWikitext => { | ||
return parseTemplates(templateWikitext, config); | ||
}); | ||
return result.concat.apply(result, subtemplates); | ||
@@ -339,10 +484,9 @@ } | ||
/** | ||
* @param {string} text - template wikitext without braces, with the pipes in | ||
* nested templates replaced by \1 | ||
* @param {Number[]} [dsr] - data source range (optional) for the template object | ||
* Array of starting and ending indices of template in wikitext | ||
* @param {string} text - template wikitext without braces, with the pipes in | ||
* nested templates replaced by \x01 | ||
* @returns {Template} | ||
*/ | ||
var processTemplateText = function (text, dsr) { | ||
const processTemplateText = function (text, namePredicate, templatePredicate) { | ||
var template = new Template('{{' + text.replace(/\1/g, '|') + '}}', dsr); | ||
const template = new Template('{{' + text.replace(/\1/g, '|') + '}}'); | ||
@@ -355,3 +499,3 @@ // swap out pipe in links with \1 control character | ||
var chunks = text.split('|').map(function (chunk) { | ||
const [name, ...parameterChunks] = text.split('|').map(chunk => { | ||
// change '\1' control characters back to pipes | ||
@@ -361,10 +505,12 @@ return chunk.replace(/\1/g, '|'); | ||
template.setName(chunks[0]); | ||
template.setName(name); | ||
if (namePredicate && !namePredicate(template.name)) { | ||
return null; | ||
} | ||
var parameterChunks = chunks.slice(1); | ||
var unnamedIdx = 1; | ||
parameterChunks.forEach(function (chunk) { | ||
var indexOfEqualTo = chunk.indexOf('='); | ||
var indexOfOpenBraces = chunk.indexOf('{{'); | ||
var indexOfOpenBraces = chunk.indexOf('{{'); | ||
@@ -391,6 +537,10 @@ var isWithoutEquals = !chunk.includes('='); | ||
if (templatePredicate && !templatePredicate(template)) { | ||
return null; | ||
} | ||
return template; | ||
}; | ||
var strReplaceAt = function (string, index, char) { | ||
const strReplaceAt = function (string, index, char) { | ||
return string.slice(0, index) + char + string.slice(index + 1); | ||
@@ -402,2 +552,1 @@ }; | ||
}; | ||
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Network access
Supply chain riskThis module accesses the network.
Found 2 instances in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
157490
4087
6
2
+ Addedform-data@^3.0.0
+ Addedoauth-1.0a@^2.2.6
+ Addedasynckit@0.4.0(transitive)
+ Addedcombined-stream@1.0.8(transitive)
+ Addeddelayed-stream@1.0.0(transitive)
+ Addedform-data@3.0.2(transitive)
+ Addedmime-db@1.52.0(transitive)
+ Addedmime-types@2.1.35(transitive)
+ Addedoauth-1.0a@2.2.6(transitive)
- Removedis-promise@^4.0.0
- Removedis-promise@4.0.0(transitive)