Socket
Socket
Sign inDemoInstall

wiktionary-translations

Package Overview
Dependencies
3
Maintainers
1
Versions
14
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.0.8 to 0.1.0

src/schematics/requests_schema.drawio

2

dist/wiktionary-translations.js

@@ -1,1 +0,1 @@

!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e(require("axios"),require("iso-639-1")):"function"==typeof define&&define.amd?define(["axios","iso-639-1"],e):"object"==typeof exports?exports.wiktionaryTransations=e(require("axios"),require("iso-639-1")):t.wiktionaryTransations=e(t.axios,t[void 0])}(this,(function(t,e){return(()=>{"use strict";var r={300:e=>{e.exports=t},95:t=>{t.exports=e}},i={};function n(t){var e=i[t];if(void 0!==e)return e.exports;var a=i[t]={exports:{}};return r[t](a,a.exports,n),a.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var r in e)n.o(e,r)&&!n.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:e[r]})},n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),n.r=t=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})};var a={};return(()=>{n.r(a),n.d(a,{getTranslations:()=>p});var t=n(300),e=n.n(t),r=n(95),i=n.n(r);const o=t=>`https://${t}.wiktionary.org/w/api.php?`,s={action:"query",format:"json",origin:"*"},l=(t,e)=>({...s,prop:"iwlinks",iwlimit:"max",iwprefix:e,titles:t}),u=(t,e=null)=>({...s,prop:"langlinks|links",lllimit:"max",pllimit:"max",lllang:e,titles:t}),c=async(t,r)=>{try{const i=await e().get(t,{params:r});if(!i||!i.data||!i.data.query)return!1;if(200===!i.status&&!i.data.query.pages)return!1;const[n]=Object.values(i.data.query.pages);return n}catch(t){console.log(t)}},p=async(t,e,r)=>{try{const n=["ae","lu","nd","nr","oj"];if(!i().validate(e)||n.includes(e))throw new Error(`Invalid source language code: "${e}"`);if(!i().validate(r)||n.includes(r))throw new Error(`Invalid target language code: "${r}"`);const a=o(e),s=/\/([^\/]+)\/?$/,u=t=>s.test(t["*"])?t["*"].match(s):t["*"],p=await c(a,l(t,r));if(!p)return!1;if(p.iwlinks)return p.iwlinks.map(u);const f=await c(a,l(t+"/translations",r));if(!f)return!1;if(f.iwlinks)return f.iwlinks.map(u);const g=await d(t,e,r);return!(!g||!g.length)&&g}catch(t){console.log(t)}},d=async(t,e,r)=>{try{const n=await c(o(e),u(t,r));if(!n)return!1;if(!n.langlinks||!n.links)return!1;const a=n.links.map((t=>t.title)).filter((e=>!e.includes(t))),l=await c(o(r),u(t));if(!l||!l.links)return!1;const p=l.links.map((t=>t.title)).filter((e=>!e.includes(t))),d=a.filter((t=>p.includes(t))),f=await Promise.all(d.map((async t=>{const n=await c(o(r),(t=>({...s,prop:"categories",titles:t}))(t));return!(!n||!n.categories)&&(!!((t,e,r)=>{const n=t.join(" ");return!(!(o=i().getNativeName(r),new RegExp(`:?${o}`,"gi")).test(n)||(a=e,new RegExp(`:${a}`,"gi")).test(n));var a,o})(n.categories.map((t=>t.title)),e,r)&&t)})));return f.filter((t=>t))}catch(t){console.log(t)}}})(),a})()}));
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e(require("iso-639-1"),require("axios")):"function"==typeof define&&define.amd?define(["iso-639-1","axios"],e):"object"==typeof exports?exports.wiktionaryTransations=e(require("iso-639-1"),require("axios")):t.wiktionaryTransations=e(t[void 0],t.axios)}(this,(function(t,e){return(()=>{"use strict";var r={300:t=>{t.exports=e},95:e=>{e.exports=t}},s={};function a(t){var e=s[t];if(void 0!==e)return e.exports;var i=s[t]={exports:{}};return r[t](i,i.exports,a),i.exports}a.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return a.d(e,{a:e}),e},a.d=(t,e)=>{for(var r in e)a.o(e,r)&&!a.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:e[r]})},a.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),a.r=t=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})};var i={};return(()=>{a.r(i),a.d(i,{default:()=>n});var t=a(95),e=a.n(t),r=a(300),s=a.n(r);const n=class extends class{srcLang;trgtLang;#t;constructor(t,e){this.srcLang=t,this.trgtLang=e,this.#t={action:"query",format:"json",origin:"*"}}#e(t){return t.replace(t[0],t[0].toUpperCase())}#r(t){return`https://${t}.wiktionary.org/w/api.php?`}#s(t,e){return{...this.#t,prop:"iwlinks",iwlimit:"max",iwprefix:t,titles:`${e}|${e}/translations|${this.#e(e)}`}}#a(t,e){return{...this.#t,prop:"langlinks|links",lllimit:"max",pllimit:"max",lllang:t,titles:e}}#i(t){return{...this.#t,prop:"categories",titles:t}}async iwLinksDataSrc(t){try{const e=await s().get(this.#r(this.srcLang),{params:this.#s(this.trgtLang,t)});if(!e||!e.data||!e.data.query)throw new Error("Invalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages)}catch(t){console.log(t)}}async linksDataSrc(t){try{const e=await s().get(this.#r(this.srcLang),{params:this.#a(this.trgtLang,t)});if(!e||!e.data||!e.data.query)throw new Error("Invalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages)}catch(t){console.log(t)}}async linksDataTrgt(t){try{const e=await s().get(this.#r(this.trgtLang),{params:this.#a("no langlinks",t)});if(!e||!e.data||!e.data.query)throw new Error("Invalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages)}catch(t){console.log(t)}}async categoriesDataTrgt(t){try{const e=await s().get(this.#r(this.trgtLang),{params:this.#i(t.join("|"))});if(!e||!e.data||!e.data.query)throw new Error("Invaalid request");if(200!==e.status||!e.data.query.pages)throw new Error("Request to the server unsuccessful");return Object.values(e.data.query.pages).filter((t=>t.pageid))}catch(t){console.log(t)}}}{#n;constructor(t,e){super(t,e),this.#n=["ae","lu","nd","nr","oj"],this.validateCodes([t,e])}validateCodes(t){const[r,s]=t;try{t.forEach((t=>{if(!e().validate(t)||this.#n.includes(t))throw new Error(`Invalid code: ${t}`)}))}catch(t){console.log(t)}}async iwLinksSrc(t){try{const e=await this.iwLinksDataSrc(t);if(!e)throw new Error("iWLinks request was unsuccessful");const[r]=e.filter((t=>t.iwlinks));if(!r)return!1;const s=r.iwlinks.map((t=>t["*"]));return!!s.length&&s}catch(t){console.log(t)}}async linksSrc(t){try{const e=await this.linksDataSrc(t);if(!e)throw new Error("links request was unsuccessful");const[r]=e;return!(!r.langlinks||!r.links)&&r.links.map((t=>t.title)).filter((e=>e!==t))}catch(t){console.log(t)}}async linksTrgt(t){const e=await this.linksDataTrgt(t);if(!e)throw new Error("links request was unsuccessful");const[r]=e.filter((t=>t.links));if(!r)return!1;const s=r.links.map((t=>t.title)).filter((e=>e!==t));return!!s.length&&s}async categoriesParse(t){const e=await this.linksSrc(t),r=await this.linksTrgt(t);if(!e||!r)return!1;const s=e.filter((t=>r.includes(t))),a=(await this.categoriesDataTrgt(s)).filter((t=>t.categories)).map((t=>({title:t.title,categories:t.categories.map((t=>t.title))}))).filter((t=>this.#o(t.categories))).map((t=>t.title));return!!a.length&&a}async getTranslations(t){const e=t.toLowerCase(),r=await this.iwLinksSrc(e);if(r)return this.#l(r);const s=await this.categoriesParse(e);return!!s&&this.#l(s)}#o(t){const r=t.join(" ");return!(!(a=e().getNativeName(this.trgtLang),new RegExp(`:?${a}`,"gi")).test(r)||(s=this.srcLang,new RegExp(`:${s}`,"gi")).test(r));var s,a}#l(t){const e=/([^\/]+$)/;return t.map((t=>e.test(t)?t.match(e)[0]:t))}}})(),i})()}));
{
"name": "wiktionary-translations",
"version": "0.0.8",
"version": "0.1.0",
"description": "wiktionary scraper api",

@@ -5,0 +5,0 @@ "main": "./dist/wiktionary-translations.js",

@@ -23,2 +23,6 @@ # Wiktionary translations

### Recently changed
The module has been rewritten in an object paradigm to support better scalability. Be aware of updated usage sytnax.
### Plans for future updates

@@ -35,3 +39,3 @@

`npm i wiktionary-translations`
npm i wiktionary-translations

@@ -42,7 +46,7 @@ ### Importing

`const wt = require("wiktionary-translations");`
const WiktTransl = require("wiktionary-translations").default;
- for ES module
`import { getTranslations } from "wiktionary-translations";`
import WiktTransl from "wiktionary-translations";

@@ -53,11 +57,6 @@ ### Functions

- for Node.js
const dict = new WiktTransl("srcLangCode", "trgtLangCode");
const translations = await dict.getTranslations("title");
`const translations = await getTranslations(articleTitle, sourceLanguage, targetLanguage)`
- for ES module
`const translations = await wt.getTranslations(articleTitle, sourceLanguage, targetLanguage)`
- params {string}
- return {array}

@@ -1,95 +0,114 @@

import axios from "axios";
//ES6 imports
import ISO6391 from "iso-639-1";
import WiktionaryRequest from "./wiktionaryRequest.js";
const endpoint = (lang) => `https://${lang}.wiktionary.org/w/api.php?`;
const defaultConfig = { action: "query", format: "json", origin: "*" };
const propIwLinksQuery = (title, trgtLang) => ({
...defaultConfig,
prop: "iwlinks",
iwlimit: "max",
iwprefix: trgtLang,
titles: title,
});
const propLangLinksQuery = (title, trgtLang = null) => ({
...defaultConfig,
prop: "langlinks|links",
lllimit: "max",
pllimit: "max",
lllang: trgtLang,
titles: title,
});
const propCategoriesQuery = (title) => ({
...defaultConfig,
prop: "categories",
titles: title,
});
//Node.js imports
// const ISO6391 = require("iso-639-1");
// const { WiktionaryRequest } = require("./wiktionaryRequest.js");
const getData = async (endpoint, params) => {
try {
const response = await axios.get(endpoint, { params });
if (!response || !response.data || !response.data.query) return false;
if (!response.status === 200 && !response.data.query.pages) return false;
const [data] = Object.values(response.data.query.pages);
return data;
} catch (error) {
console.log(error);
class WiktTransl extends WiktionaryRequest {
#langsNotInWikt;
constructor(srcLang, trgtLang) {
super(srcLang, trgtLang);
this.#langsNotInWikt = ["ae", "lu", "nd", "nr", "oj"];
this.validateCodes([srcLang, trgtLang]);
}
};
const getTranslations = async (title, srcLang, trgtLang) => {
try {
//Langs not supported by Wiktionary
const notInWikt = ["ae", "lu", "nd", "nr", "oj"];
//Language codes validation
if (!ISO6391.validate(srcLang) || notInWikt.includes(srcLang))
throw new Error(`Invalid source language code: "${srcLang}"`);
if (!ISO6391.validate(trgtLang) || notInWikt.includes(trgtLang))
throw new Error(`Invalid target language code: "${trgtLang}"`);
const newEndpoint = endpoint(srcLang);
const noSlashRegex = /\/([^\/]+)\/?$/;
const parseTitle = (entry) =>
noSlashRegex.test(entry["*"])
? entry["*"].match(noSlashRegex)
: entry["*"];
const respIwLinks = await getData(
newEndpoint,
propIwLinksQuery(title, trgtLang)
);
//Guard clause
//Check if iwlinks prop exists and if not update response vairable with new data from another source
if (!respIwLinks) return false;
if (respIwLinks.iwlinks) {
return respIwLinks.iwlinks.map(parseTitle);
validateCodes(langCodes) {
const [srcLangCode, trgtLangCode] = langCodes;
try {
langCodes.forEach((code) => {
if (!ISO6391.validate(code) || this.#langsNotInWikt.includes(code))
throw new Error(`Invalid code: ${code}`);
});
} catch (error) {
console.log(error);
}
//Sometimes translations are on a separate page such as /translations
const respIwLinksTrans = await getData(
newEndpoint,
propIwLinksQuery(title + "/translations", trgtLang)
);
//Guard clause
if (!respIwLinksTrans) return false;
if (respIwLinksTrans.iwlinks) {
return respIwLinksTrans.iwlinks.map(parseTitle);
}
async iwLinksSrc(title) {
try {
const responseArray = await this.iwLinksDataSrc(title);
if (!responseArray) throw new Error("iWLinks request was unsuccessful");
//Filter array with condition that iwlinks property exists
const [filteredResponse] = responseArray.filter((entry) => entry.iwlinks);
if (!filteredResponse) return false;
const translations = filteredResponse.iwlinks.map((entry) => entry["*"]);
if (translations.length) return translations;
return false;
} catch (error) {
console.log(error);
}
//If nothing was fetched from IwLinks then get parsed translations from langLinks prop
const titlesLangLinks = await transLangLinks(title, srcLang, trgtLang);
if (titlesLangLinks && titlesLangLinks.length) {
return titlesLangLinks;
}
async linksSrc(title) {
try {
const responseArray = await this.linksDataSrc(title);
if (!responseArray) throw new Error("links request was unsuccessful");
const [response] = responseArray;
/*
With langlinks it is possible to check if the target language page with
the same title exists. The existence of this page is required for the algorithm
to work, thus if this page does not exist it's ok to terminate the whole process
at this point.
*/
if (!response.langlinks || !response.links) return false;
const srcLinks = response.links
.map((entry) => entry.title)
.filter((localTitle) => localTitle !== title);
return srcLinks;
} catch (error) {
console.log(error);
}
//If nothing was fetched return false
}
async linksTrgt(title) {
const responseArray = await this.linksDataTrgt(title);
if (!responseArray) throw new Error("links request was unsuccessful");
const [response] = responseArray.filter((entry) => entry.links);
if (!response) return false;
const links = response.links
.map((entry) => entry.title)
.filter((localTitle) => localTitle !== title);
if (links.length) return links;
return false;
} catch (error) {
console.log(error);
}
};
async categoriesParse(title) {
const linksFromSrc = await this.linksSrc(title);
const linksFromTrgt = await this.linksTrgt(title);
if (!linksFromSrc || !linksFromTrgt) return false;
//Filter same titles form both arrays (arrays intersection)
const sameTitles = linksFromSrc.filter((title) =>
linksFromTrgt.includes(title)
);
const categories = await this.categoriesDataTrgt(sameTitles);
//filter for responses only with categories parameter
const existingCategories = categories.filter((entry) => entry.categories);
const parsedCategories = existingCategories.map((entry) => ({
title: entry.title,
categories: entry.categories.map((entry) => entry.title),
}));
const filteredCategories = parsedCategories.filter((entry) =>
this.#checkCategories(entry.categories)
);
const parsedTitles = filteredCategories.map((entry) => entry.title);
if (parsedTitles.length) return parsedTitles;
return false;
}
async getTranslations(originalTile) {
const title = originalTile.toLowerCase();
//Request #1 - get translations from internal wiki links
const iwLinksResp = await this.iwLinksSrc(title);
if (iwLinksResp) return this.#parseTitles(iwLinksResp);
//If Request #1 was unsucess continue with fetching from Requests #2, #3 and #4
const categoriesParseResp = await this.categoriesParse(title);
if (categoriesParseResp) return this.#parseTitles(categoriesParseResp);
// this return false is giving a information that no translations were parsed
return false;
}
const transLangLinks = async (title, srcLang, trgtLang) => {
const parseCategories = (categories, srcLang, trgtLang) => {
const catString = categories.join(" ");
const nativeName = ISO6391.getNativeName(trgtLang);
#checkCategories(categories) {
const categoriesString = categories.join(" ");
const nativeName = ISO6391.getNativeName(this.trgtLang);
const codeRegex = (langCode) => new RegExp(`:${langCode}`, "gi");
const nameRegex = (langName) => new RegExp(`:?${langName}`, "gi");
if (
nameRegex(nativeName).test(catString) &&
!codeRegex(srcLang).test(catString)
nameRegex(nativeName).test(categoriesString) &&
!codeRegex(this.srcLang).test(categoriesString)
) {

@@ -100,54 +119,13 @@ return true;

}
};
try {
//Fetch LangLinks and Links from the page
const respAllLinks = await getData(
endpoint(srcLang),
propLangLinksQuery(title, trgtLang)
}
#parseTitles(titles) {
const regex = /([^\/]+$)/;
const titlesParsed = titles.map((title) =>
regex.test(title) ? title.match(regex)[0] : title
);
//Guard caluses
if (!respAllLinks) return false;
if (!respAllLinks.langlinks || !respAllLinks.links) return false;
//Map titles to array and filter out original title
const titlesSrcLinks = respAllLinks.links
.map((entry) => entry.title)
.filter((localTitle) => !localTitle.includes(title));
//Get all links from target language page
const respTrgtPageLinks = await getData(
endpoint(trgtLang),
propLangLinksQuery(title)
);
//Map titles and remove original title from array of target page titles
if (!respTrgtPageLinks || !respTrgtPageLinks.links) return false;
const titlesTrgtPageLinks = respTrgtPageLinks.links
.map((entry) => entry.title)
.filter((localTitle) => !localTitle.includes(title));
//Check for the intersection between both arrays to get a translation word we are looking for
const intersections = titlesSrcLinks.filter((localTitle) =>
titlesTrgtPageLinks.includes(localTitle)
);
//Check for categories for the rest of the words
const translations = await Promise.all(
intersections.map(async (localTitle) => {
const response = await getData(
endpoint(trgtLang),
propCategoriesQuery(localTitle)
);
if (response && response.categories) {
const categories = response.categories.map((entry) => entry.title);
if (parseCategories(categories, srcLang, trgtLang)) {
return localTitle;
} else return false;
} else return false;
})
);
const filteredTranslations = translations.filter(
(localTitle) => localTitle
);
return filteredTranslations;
} catch (error) {
console.log(error);
return titlesParsed;
}
};
}
export { getTranslations };
export default WiktTransl;
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc