wiktionary-translations
Advanced tools
Comparing version 0.0.8 to 0.1.0
@@ -1,1 +0,1 @@ | ||
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e(require("axios"),require("iso-639-1")):"function"==typeof define&&define.amd?define(["axios","iso-639-1"],e):"object"==typeof exports?exports.wiktionaryTransations=e(require("axios"),require("iso-639-1")):t.wiktionaryTransations=e(t.axios,t[void 0])}(this,(function(t,e){return(()=>{"use strict";var r={300:e=>{e.exports=t},95:t=>{t.exports=e}},i={};function n(t){var e=i[t];if(void 0!==e)return e.exports;var a=i[t]={exports:{}};return r[t](a,a.exports,n),a.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var r in e)n.o(e,r)&&!n.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:e[r]})},n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),n.r=t=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})};var a={};return(()=>{n.r(a),n.d(a,{getTranslations:()=>p});var t=n(300),e=n.n(t),r=n(95),i=n.n(r);const o=t=>`https://${t}.wiktionary.org/w/api.php?`,s={action:"query",format:"json",origin:"*"},l=(t,e)=>({...s,prop:"iwlinks",iwlimit:"max",iwprefix:e,titles:t}),u=(t,e=null)=>({...s,prop:"langlinks|links",lllimit:"max",pllimit:"max",lllang:e,titles:t}),c=async(t,r)=>{try{const i=await e().get(t,{params:r});if(!i||!i.data||!i.data.query)return!1;if(200===!i.status&&!i.data.query.pages)return!1;const[n]=Object.values(i.data.query.pages);return n}catch(t){console.log(t)}},p=async(t,e,r)=>{try{const n=["ae","lu","nd","nr","oj"];if(!i().validate(e)||n.includes(e))throw new Error(`Invalid source language code: "${e}"`);if(!i().validate(r)||n.includes(r))throw new Error(`Invalid target language code: "${r}"`);const a=o(e),s=/\/([^\/]+)\/?$/,u=t=>s.test(t["*"])?t["*"].match(s):t["*"],p=await c(a,l(t,r));if(!p)return!1;if(p.iwlinks)return p.iwlinks.map(u);const f=await c(a,l(t+"/translations",r));if(!f)return!1;if(f.iwlinks)return f.iwlinks.map(u);const g=await d(t,e,r);return!(!g||!g.length)&&g}catch(t){console.log(t)}},d=async(t,e,r)=>{try{const n=await c(o(e),u(t,r));if(!n)return!1;if(!n.langlinks||!n.links)return!1;const a=n.links.map((t=>t.title)).filter((e=>!e.includes(t))),l=await c(o(r),u(t));if(!l||!l.links)return!1;const p=l.links.map((t=>t.title)).filter((e=>!e.includes(t))),d=a.filter((t=>p.includes(t))),f=await Promise.all(d.map((async t=>{const n=await c(o(r),(t=>({...s,prop:"categories",titles:t}))(t));return!(!n||!n.categories)&&(!!((t,e,r)=>{const n=t.join(" ");return!(!(o=i().getNativeName(r),new RegExp(`:?${o}`,"gi")).test(n)||(a=e,new RegExp(`:${a}`,"gi")).test(n));var a,o})(n.categories.map((t=>t.title)),e,r)&&t)})));return f.filter((t=>t))}catch(t){console.log(t)}}})(),a})()})); | ||
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e(require("iso-639-1"),require("axios")):"function"==typeof define&&define.amd?define(["iso-639-1","axios"],e):"object"==typeof exports?exports.wiktionaryTransations=e(require("iso-639-1"),require("axios")):t.wiktionaryTransations=e(t[void 0],t.axios)}(this,(function(t,e){return(()=>{"use strict";var r={300:t=>{t.exports=e},95:e=>{e.exports=t}},s={};function a(t){var e=s[t];if(void 0!==e)return e.exports;var i=s[t]={exports:{}};return r[t](i,i.exports,a),i.exports}a.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return a.d(e,{a:e}),e},a.d=(t,e)=>{for(var r in e)a.o(e,r)&&!a.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:e[r]})},a.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),a.r=t=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})};var i={};return(()=>{a.r(i),a.d(i,{default:()=>n});var t=a(95),e=a.n(t),r=a(300),s=a.n(r);const n=class extends class{srcLang;trgtLang;#t;constructor(t,e){this.srcLang=t,this.trgtLang=e,this.#t={action:"query",format:"json",origin:"*"}}#e(t){return t.replace(t[0],t[0].toUpperCase())}#r(t){return`https://${t}.wiktionary.org/w/api.php?`}#s(t,e){return{...this.#t,prop:"iwlinks",iwlimit:"max",iwprefix:t,titles:`${e}|${e}/translations|${this.#e(e)}`}}#a(t,e){return{...this.#t,prop:"langlinks|links",lllimit:"max",pllimit:"max",lllang:t,titles:e}}#i(t){return{...this.#t,prop:"categories",titles:t}}async iwLinksDataSrc(t){try{const e=await s().get(this.#r(this.srcLang),{params:this.#s(this.trgtLang,t)});if(!e||!e.data||!e.data.query)throw new Error("Invalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages)}catch(t){console.log(t)}}async linksDataSrc(t){try{const e=await s().get(this.#r(this.srcLang),{params:this.#a(this.trgtLang,t)});if(!e||!e.data||!e.data.query)throw new Error("Invalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages)}catch(t){console.log(t)}}async linksDataTrgt(t){try{const e=await s().get(this.#r(this.trgtLang),{params:this.#a("no langlinks",t)});if(!e||!e.data||!e.data.query)throw new Error("Invalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages)}catch(t){console.log(t)}}async categoriesDataTrgt(t){try{const e=await s().get(this.#r(this.trgtLang),{params:this.#i(t.join("|"))});if(!e||!e.data||!e.data.query)throw new Error("Invaalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages).filter((t=>t.pageid))}catch(t){console.log(t)}}}{#n;constructor(t,e){super(t,e),this.#n=["ae","lu","nd","nr","oj"],this.validateCodes([t,e])}validateCodes(t){const[r,s]=t;try{t.forEach((t=>{if(!e().validate(t)||this.#n.includes(t))throw new Error(`Invalid code: ${t}`)}))}catch(t){console.log(t)}}async iwLinksSrc(t){try{const e=await this.iwLinksDataSrc(t);if(!e)throw new Error("iWLinks request was unsuccessful");const[r]=e.filter((t=>t.iwlinks));if(!r)return!1;const s=r.iwlinks.map((t=>t["*"]));return!!s.length&&s}catch(t){console.log(t)}}async linksSrc(t){try{const e=await this.linksDataSrc(t);if(!e)throw new Error("links request was unsuccessful");const[r]=e;return!(!r.langlinks||!r.links)&&r.links.map((t=>t.title)).filter((e=>e!==t))}catch(t){console.log(t)}}async linksTrgt(t){const e=await this.linksDataTrgt(t);if(!e)throw new Error("links request was unsuccessful");const[r]=e.filter((t=>t.links));if(!r)return!1;const s=r.links.map((t=>t.title)).filter((e=>e!==t));return!!s.length&&s}async categoriesParse(t){const e=await this.linksSrc(t),r=await this.linksTrgt(t);if(!e||!r)return!1;const s=e.filter((t=>r.includes(t))),a=(await this.categoriesDataTrgt(s)).filter((t=>t.categories)).map((t=>({title:t.title,categories:t.categories.map((t=>t.title))}))).filter((t=>this.#o(t.categories))).map((t=>t.title));return!!a.length&&a}async getTranslations(t){const e=t.toLowerCase(),r=await this.iwLinksSrc(e);if(r)return this.#l(r);const s=await this.categoriesParse(e);return!!s&&this.#l(s)}#o(t){const r=t.join(" ");return!(!(a=e().getNativeName(this.trgtLang),new RegExp(`:?${a}`,"gi")).test(r)||(s=this.srcLang,new RegExp(`:${s}`,"gi")).test(r));var s,a}#l(t){const e=/([^\/]+$)/;return t.map((t=>e.test(t)?t.match(e)[0]:t))}}})(),i})()})); |
{ | ||
"name": "wiktionary-translations", | ||
"version": "0.0.8", | ||
"version": "0.1.0", | ||
"description": "wiktionary scraper api", | ||
@@ -5,0 +5,0 @@ "main": "./dist/wiktionary-translations.js", |
@@ -23,2 +23,6 @@ # Wiktionary translations | ||
### Recently changed | ||
The module has been rewritten in an object paradigm to support better scalability. Be aware of updated usage sytnax. | ||
### Plans for future updates | ||
@@ -35,3 +39,3 @@ | ||
`npm i wiktionary-translations` | ||
npm i wiktionary-translations | ||
@@ -42,7 +46,7 @@ ### Importing | ||
`const wt = require("wiktionary-translations");` | ||
const WiktTransl = require("wiktionary-translations").default; | ||
- for ES module | ||
`import { getTranslations } from "wiktionary-translations";` | ||
import WiktTransl from "wiktionary-translations"; | ||
@@ -53,11 +57,6 @@ ### Functions | ||
- for Node.js | ||
const dict = new WiktTransl("srcLangCode", "trgtLangCode"); | ||
const translations = await dict.getTranslations("title"); | ||
`const translations = await getTranslations(articleTitle, sourceLanguage, targetLanguage)` | ||
- for ES module | ||
`const translations = await wt.getTranslations(articleTitle, sourceLanguage, targetLanguage)` | ||
- params {string} | ||
- return {array} |
240
src/index.js
@@ -1,95 +0,114 @@ | ||
import axios from "axios"; | ||
//ES6 imports | ||
import ISO6391 from "iso-639-1"; | ||
import WiktionaryRequest from "./wiktionaryRequest.js"; | ||
const endpoint = (lang) => `https://${lang}.wiktionary.org/w/api.php?`; | ||
const defaultConfig = { action: "query", format: "json", origin: "*" }; | ||
const propIwLinksQuery = (title, trgtLang) => ({ | ||
...defaultConfig, | ||
prop: "iwlinks", | ||
iwlimit: "max", | ||
iwprefix: trgtLang, | ||
titles: title, | ||
}); | ||
const propLangLinksQuery = (title, trgtLang = null) => ({ | ||
...defaultConfig, | ||
prop: "langlinks|links", | ||
lllimit: "max", | ||
pllimit: "max", | ||
lllang: trgtLang, | ||
titles: title, | ||
}); | ||
const propCategoriesQuery = (title) => ({ | ||
...defaultConfig, | ||
prop: "categories", | ||
titles: title, | ||
}); | ||
//Node.js imports | ||
// const ISO6391 = require("iso-639-1"); | ||
// const { WiktionaryRequest } = require("./wiktionaryRequest.js"); | ||
const getData = async (endpoint, params) => { | ||
try { | ||
const response = await axios.get(endpoint, { params }); | ||
if (!response || !response.data || !response.data.query) return false; | ||
if (!response.status === 200 && !response.data.query.pages) return false; | ||
const [data] = Object.values(response.data.query.pages); | ||
return data; | ||
} catch (error) { | ||
console.log(error); | ||
class WiktTransl extends WiktionaryRequest { | ||
#langsNotInWikt; | ||
constructor(srcLang, trgtLang) { | ||
super(srcLang, trgtLang); | ||
this.#langsNotInWikt = ["ae", "lu", "nd", "nr", "oj"]; | ||
this.validateCodes([srcLang, trgtLang]); | ||
} | ||
}; | ||
const getTranslations = async (title, srcLang, trgtLang) => { | ||
try { | ||
//Langs not supported by Wiktionary | ||
const notInWikt = ["ae", "lu", "nd", "nr", "oj"]; | ||
//Language codes validation | ||
if (!ISO6391.validate(srcLang) || notInWikt.includes(srcLang)) | ||
throw new Error(`Invalid source language code: "${srcLang}"`); | ||
if (!ISO6391.validate(trgtLang) || notInWikt.includes(trgtLang)) | ||
throw new Error(`Invalid target language code: "${trgtLang}"`); | ||
const newEndpoint = endpoint(srcLang); | ||
const noSlashRegex = /\/([^\/]+)\/?$/; | ||
const parseTitle = (entry) => | ||
noSlashRegex.test(entry["*"]) | ||
? entry["*"].match(noSlashRegex) | ||
: entry["*"]; | ||
const respIwLinks = await getData( | ||
newEndpoint, | ||
propIwLinksQuery(title, trgtLang) | ||
); | ||
//Guard clause | ||
//Check if iwlinks prop exists and if not update response vairable with new data from another source | ||
if (!respIwLinks) return false; | ||
if (respIwLinks.iwlinks) { | ||
return respIwLinks.iwlinks.map(parseTitle); | ||
validateCodes(langCodes) { | ||
const [srcLangCode, trgtLangCode] = langCodes; | ||
try { | ||
langCodes.forEach((code) => { | ||
if (!ISO6391.validate(code) || this.#langsNotInWikt.includes(code)) | ||
throw new Error(`Invalid code: ${code}`); | ||
}); | ||
} catch (error) { | ||
console.log(error); | ||
} | ||
//Sometimes translations are on a separate page such as /translations | ||
const respIwLinksTrans = await getData( | ||
newEndpoint, | ||
propIwLinksQuery(title + "/translations", trgtLang) | ||
); | ||
//Guard clause | ||
if (!respIwLinksTrans) return false; | ||
if (respIwLinksTrans.iwlinks) { | ||
return respIwLinksTrans.iwlinks.map(parseTitle); | ||
} | ||
async iwLinksSrc(title) { | ||
try { | ||
const responseArray = await this.iwLinksDataSrc(title); | ||
if (!responseArray) throw new Error("iWLinks request was unsuccessful"); | ||
//Filter array with condition that iwlinks property exists | ||
const [filteredResponse] = responseArray.filter((entry) => entry.iwlinks); | ||
if (!filteredResponse) return false; | ||
const translations = filteredResponse.iwlinks.map((entry) => entry["*"]); | ||
if (translations.length) return translations; | ||
return false; | ||
} catch (error) { | ||
console.log(error); | ||
} | ||
//If nothing was fetched from IwLinks then get parsed translations from langLinks prop | ||
const titlesLangLinks = await transLangLinks(title, srcLang, trgtLang); | ||
if (titlesLangLinks && titlesLangLinks.length) { | ||
return titlesLangLinks; | ||
} | ||
async linksSrc(title) { | ||
try { | ||
const responseArray = await this.linksDataSrc(title); | ||
if (!responseArray) throw new Error("links request was unsuccessful"); | ||
const [response] = responseArray; | ||
/* | ||
With langlinks it is possible to check if the target language page with | ||
the same title exists. The existence of this page is required for the algorithm | ||
to work, thus if this page does not exist it's ok to terminate the whole process | ||
at this point. | ||
*/ | ||
if (!response.langlinks || !response.links) return false; | ||
const srcLinks = response.links | ||
.map((entry) => entry.title) | ||
.filter((localTitle) => localTitle !== title); | ||
return srcLinks; | ||
} catch (error) { | ||
console.log(error); | ||
} | ||
//If nothing was fetched return false | ||
} | ||
async linksTrgt(title) { | ||
const responseArray = await this.linksDataTrgt(title); | ||
if (!responseArray) throw new Error("links request was unsuccessful"); | ||
const [response] = responseArray.filter((entry) => entry.links); | ||
if (!response) return false; | ||
const links = response.links | ||
.map((entry) => entry.title) | ||
.filter((localTitle) => localTitle !== title); | ||
if (links.length) return links; | ||
return false; | ||
} catch (error) { | ||
console.log(error); | ||
} | ||
}; | ||
async categoriesParse(title) { | ||
const linksFromSrc = await this.linksSrc(title); | ||
const linksFromTrgt = await this.linksTrgt(title); | ||
if (!linksFromSrc || !linksFromTrgt) return false; | ||
//Filter same titles form both arrays (arrays intersection) | ||
const sameTitles = linksFromSrc.filter((title) => | ||
linksFromTrgt.includes(title) | ||
); | ||
const categories = await this.categoriesDataTrgt(sameTitles); | ||
//filter for responses only with categories parameter | ||
const existingCategories = categories.filter((entry) => entry.categories); | ||
const parsedCategories = existingCategories.map((entry) => ({ | ||
title: entry.title, | ||
categories: entry.categories.map((entry) => entry.title), | ||
})); | ||
const filteredCategories = parsedCategories.filter((entry) => | ||
this.#checkCategories(entry.categories) | ||
); | ||
const parsedTitles = filteredCategories.map((entry) => entry.title); | ||
if (parsedTitles.length) return parsedTitles; | ||
return false; | ||
} | ||
async getTranslations(originalTile) { | ||
const title = originalTile.toLowerCase(); | ||
//Request #1 - get translations from internal wiki links | ||
const iwLinksResp = await this.iwLinksSrc(title); | ||
if (iwLinksResp) return this.#parseTitles(iwLinksResp); | ||
//If Request #1 was unsucess continue with fetching from Requests #2, #3 and #4 | ||
const categoriesParseResp = await this.categoriesParse(title); | ||
if (categoriesParseResp) return this.#parseTitles(categoriesParseResp); | ||
// this return false is giving a information that no translations were parsed | ||
return false; | ||
} | ||
const transLangLinks = async (title, srcLang, trgtLang) => { | ||
const parseCategories = (categories, srcLang, trgtLang) => { | ||
const catString = categories.join(" "); | ||
const nativeName = ISO6391.getNativeName(trgtLang); | ||
#checkCategories(categories) { | ||
const categoriesString = categories.join(" "); | ||
const nativeName = ISO6391.getNativeName(this.trgtLang); | ||
const codeRegex = (langCode) => new RegExp(`:${langCode}`, "gi"); | ||
const nameRegex = (langName) => new RegExp(`:?${langName}`, "gi"); | ||
if ( | ||
nameRegex(nativeName).test(catString) && | ||
!codeRegex(srcLang).test(catString) | ||
nameRegex(nativeName).test(categoriesString) && | ||
!codeRegex(this.srcLang).test(categoriesString) | ||
) { | ||
@@ -100,54 +119,13 @@ return true; | ||
} | ||
}; | ||
try { | ||
//Fetch LangLinks and Links from the page | ||
const respAllLinks = await getData( | ||
endpoint(srcLang), | ||
propLangLinksQuery(title, trgtLang) | ||
} | ||
#parseTitles(titles) { | ||
const regex = /([^\/]+$)/; | ||
const titlesParsed = titles.map((title) => | ||
regex.test(title) ? title.match(regex)[0] : title | ||
); | ||
//Guard caluses | ||
if (!respAllLinks) return false; | ||
if (!respAllLinks.langlinks || !respAllLinks.links) return false; | ||
//Map titles to array and filter out original title | ||
const titlesSrcLinks = respAllLinks.links | ||
.map((entry) => entry.title) | ||
.filter((localTitle) => !localTitle.includes(title)); | ||
//Get all links from target language page | ||
const respTrgtPageLinks = await getData( | ||
endpoint(trgtLang), | ||
propLangLinksQuery(title) | ||
); | ||
//Map titles and remove original title from array of target page titles | ||
if (!respTrgtPageLinks || !respTrgtPageLinks.links) return false; | ||
const titlesTrgtPageLinks = respTrgtPageLinks.links | ||
.map((entry) => entry.title) | ||
.filter((localTitle) => !localTitle.includes(title)); | ||
//Check for the intersection between both arrays to get a translation word we are looking for | ||
const intersections = titlesSrcLinks.filter((localTitle) => | ||
titlesTrgtPageLinks.includes(localTitle) | ||
); | ||
//Check for categories for the rest of the words | ||
const translations = await Promise.all( | ||
intersections.map(async (localTitle) => { | ||
const response = await getData( | ||
endpoint(trgtLang), | ||
propCategoriesQuery(localTitle) | ||
); | ||
if (response && response.categories) { | ||
const categories = response.categories.map((entry) => entry.title); | ||
if (parseCategories(categories, srcLang, trgtLang)) { | ||
return localTitle; | ||
} else return false; | ||
} else return false; | ||
}) | ||
); | ||
const filteredTranslations = translations.filter( | ||
(localTitle) => localTitle | ||
); | ||
return filteredTranslations; | ||
} catch (error) { | ||
console.log(error); | ||
return titlesParsed; | ||
} | ||
}; | ||
} | ||
export { getTranslations }; | ||
export default WiktTransl; |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
50900
7
259
59
1