@a_kawashiro/jendeley
Advanced tools
Comparing version 0.0.8 to 0.0.9
336
dist/gen.js
@@ -15,6 +15,5 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.registerNonBookPDF = exports.getDocIDFromTitle = exports.getTitleFromPath = exports.getDoiJSON = exports.getJson = exports.getDocIDFromTexts = exports.genDummyDB = exports.getDocID = exports.genDB = void 0; | ||
exports.registerWeb = exports.registerNonBookPDF = exports.getTitleFromPath = exports.getDoiJSON = exports.getJson = exports.genDummyDB = exports.genDB = void 0; | ||
const fs_1 = __importDefault(require("fs")); | ||
const path_1 = __importDefault(require("path")); | ||
const pdf_parse_1 = __importDefault(require("pdf-parse")); | ||
const node_isbn_1 = __importDefault(require("node-isbn")); | ||
@@ -25,2 +24,3 @@ const xml2js_1 = __importDefault(require("xml2js")); | ||
const constants_1 = require("./constants"); | ||
const docid_1 = require("./docid"); | ||
function walkPDFDFS(dir) { | ||
@@ -55,186 +55,2 @@ if (!fs_1.default.existsSync(dir)) { | ||
} | ||
function getDocIDFromTexts(texts) { | ||
const regexpDOI = new RegExp('(10[.][0-9]{2,}(?:[.][0-9]+)*/(?:(?![%"#? ])\\S)+)', "g"); | ||
const regexpArxivDOI = new RegExp("(arXiv:[0-9]{4}[.][0-9]{4,5})", "g"); | ||
const regexpISBN = new RegExp("(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}|97[89][0-9]{10}|(?=(?:[0-9]+[- ]){4})[- 0-9]{17})(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]", "g"); | ||
let doi = null; | ||
let arxiv = null; | ||
for (const text of texts) { | ||
const foundDOI = [...text.matchAll(regexpDOI)]; | ||
for (const f of foundDOI) { | ||
let d = f[0]; | ||
if (d.charAt(d.length - 1) == ".") { | ||
d = d.substr(0, d.length - 1); | ||
} | ||
// Hack for POPL | ||
d = d.replace("10.1145/http://dx.doi.org/", ""); | ||
doi = d; | ||
break; | ||
} | ||
if (doi != null) | ||
break; | ||
const foundArxivDOI = [...text.matchAll(regexpArxivDOI)]; | ||
for (const f of foundArxivDOI) { | ||
const d = f[0]; | ||
arxiv = d.substring(6); | ||
break; | ||
} | ||
if (arxiv != null) | ||
break; | ||
} | ||
let isbn = null; | ||
for (const text of texts) { | ||
const foundISBN = [...text.matchAll(regexpISBN)]; | ||
for (const f of foundISBN) { | ||
let d = f[0]; | ||
let n = ""; | ||
for (const c of d) { | ||
if (("0" <= c && c <= "9") || c == "X") { | ||
n += c; | ||
} | ||
} | ||
if (n.length == 10) { | ||
const invalid = new RegExp("(0000000000)|(1111111111)|(2222222222)|(3333333333)|(4444444444)|(5555555555)|(6666666666)|(7777777777)|(8888888888)|(9999999999)", "g"); | ||
const foundInvalid = [...n.matchAll(invalid)]; | ||
if (foundInvalid.length != 0) { | ||
continue; | ||
} | ||
let cd = 0; | ||
for (let i = 0; i < 9; i++) { | ||
cd += (10 - i) * (n.charCodeAt(i) - "0".charCodeAt(0)); | ||
} | ||
cd = 11 - (cd % 11); | ||
const cd_c = cd == 10 ? "X" : String.fromCharCode("0".charCodeAt(0) + cd); | ||
if (cd_c == n[9]) { | ||
isbn = n; | ||
} | ||
} | ||
else if (n.length == 13 && | ||
(n.substring(0, 3) == "978" || n.substring(0, 3) == "979")) { | ||
let cd = 0; | ||
for (let i = 0; i < 12; i++) { | ||
if (i % 2 == 0) { | ||
cd += n.charCodeAt(i) - "0".charCodeAt(0); | ||
} | ||
else { | ||
cd += (n.charCodeAt(i) - "0".charCodeAt(0)) * 3; | ||
} | ||
} | ||
cd = 10 - (cd % 10); | ||
const cd_c = String.fromCharCode("0".charCodeAt(0) + cd); | ||
if (cd_c == n[12]) { | ||
isbn = n; | ||
} | ||
} | ||
break; | ||
} | ||
if (isbn != null) | ||
break; | ||
} | ||
return { doi: doi, isbn: isbn, arxiv: arxiv, path: null }; | ||
} | ||
exports.getDocIDFromTexts = getDocIDFromTexts; | ||
function getDocIDFromUrl(url) { | ||
const regexpArxiv = new RegExp("https://arxiv[.]org/pdf/([0-9]{4}[.][0-9]{4,5})[.]pdf", "g"); | ||
const foundArxiv = [...url.matchAll(regexpArxiv)]; | ||
for (const f of foundArxiv) { | ||
return { doi: null, isbn: null, arxiv: f[1], path: null }; | ||
} | ||
return null; | ||
} | ||
function getDocIDManuallyWritten(pdf) { | ||
const regexpDOI1 = new RegExp("(doi_10_[0-9]{4}_[0-9]{4,}([_-][0-9()-]{6,})?)", "g"); | ||
const foundDOI1 = [...pdf.matchAll(regexpDOI1)]; | ||
for (const f of foundDOI1) { | ||
let d = f[0].substring(4); | ||
d = | ||
d.substring(0, 2) + | ||
"." + | ||
d.substring(3, 3 + 4) + | ||
"/" + | ||
d.substring(3 + 4 + 1); | ||
d = d.replaceAll("_", "."); | ||
return { doi: d, isbn: null, arxiv: null, path: null }; | ||
} | ||
const regexpDOI2 = new RegExp("(doi_10_[0-9]{4}_[A-Z]{1,3}[0-9]+[0-9X])", "g"); | ||
const foundDOI2 = [...pdf.matchAll(regexpDOI2)]; | ||
for (const f of foundDOI2) { | ||
let d = f[0].substring(4); | ||
d = | ||
d.substring(0, 2) + | ||
"." + | ||
d.substring(3, 3 + 4) + | ||
"/" + | ||
d.substring(3 + 4 + 1); | ||
d = d.replaceAll("_", "."); | ||
return { doi: d, isbn: null, arxiv: null, path: null }; | ||
} | ||
const regexpDOI3 = new RegExp("(doi_10_[0-9]{4}_[a-zA-z]+_[0-9]+_[0-9]+)", "g"); | ||
const foundDOI3 = [...pdf.matchAll(regexpDOI3)]; | ||
for (const f of foundDOI3) { | ||
let d = f[0].substring(4); | ||
d = | ||
d.substring(0, 2) + | ||
"." + | ||
d.substring(3, 3 + 4) + | ||
"/" + | ||
d.substring(3 + 4 + 1); | ||
d = d.replaceAll("_", "."); | ||
return { doi: d, isbn: null, arxiv: null, path: null }; | ||
} | ||
const regexpDOI4 = new RegExp("(doi_10_[0-9]{4}_[0-9X-]+_[0-9]{1,})", "g"); | ||
const foundDOI4 = [...pdf.matchAll(regexpDOI4)]; | ||
for (const f of foundDOI4) { | ||
let d = f[0].substring(4); | ||
d = | ||
d.substring(0, 2) + | ||
"." + | ||
d.substring(3, 3 + 4) + | ||
"/" + | ||
d.substring(3 + 4 + 1); | ||
return { doi: d, isbn: null, arxiv: null, path: null }; | ||
} | ||
const regexpDOI6 = new RegExp("(doi_10_[0-9]{4}_[a-zA-z]+-[0-9]+-[0-9]+)", "g"); | ||
const foundDOI6 = [...pdf.matchAll(regexpDOI6)]; | ||
for (const f of foundDOI6) { | ||
let d = f[0].substring(4); | ||
d = | ||
d.substring(0, 2) + | ||
"." + | ||
d.substring(3, 3 + 4) + | ||
"/" + | ||
d.substring(3 + 4 + 1); | ||
d = d.replaceAll("_", "."); | ||
return { doi: d, isbn: null, arxiv: null, path: null }; | ||
} | ||
const regexpDOI7 = new RegExp("(doi_10_[0-9]{4}_978-[0-9-]+)", "g"); | ||
const foundDOI7 = [...pdf.matchAll(regexpDOI7)]; | ||
for (const f of foundDOI7) { | ||
let d = f[0].substring(4); | ||
d = | ||
d.substring(0, 2) + | ||
"." + | ||
d.substring(3, 3 + 4) + | ||
"/" + | ||
d.substring(3 + 4 + 1); | ||
d = d.replaceAll("_", "."); | ||
return { doi: d, isbn: null, arxiv: null, path: null }; | ||
} | ||
const regexpISBN = new RegExp("(isbn_[0-9]{10,})", "g"); | ||
const foundISBN = [...pdf.matchAll(regexpISBN)]; | ||
for (const f of foundISBN) { | ||
let d = f[0].substring(5); | ||
return { doi: null, isbn: d, arxiv: null, path: null }; | ||
} | ||
if (path_1.default.basename(pdf, ".pdf").endsWith("no_id") || | ||
pdf.includes(constants_1.JENDELEY_NO_ID)) { | ||
return { | ||
doi: null, | ||
isbn: null, | ||
arxiv: null, | ||
path: pdf, | ||
}; | ||
} | ||
return null; | ||
} | ||
function getTitleFromPath(pdf) { | ||
@@ -255,71 +71,2 @@ const basename = path_1.default.basename(pdf, ".pdf"); | ||
exports.getTitleFromPath = getTitleFromPath; | ||
function getDocIDFromTitle(pdf) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const title = getTitleFromPath(pdf); | ||
let { got } = yield import("got"); | ||
const URL = "https://api.crossref.org/v1/works?query.bibliographic=" + | ||
title.replaceAll(" ", "+"); | ||
const options = { headers: { Accept: "application/json" } }; | ||
try { | ||
const data = (yield got(URL, options).json()); | ||
const n_item = data["message"]["items"].length; | ||
for (let i = 0; i < n_item; i++) { | ||
const t = data["message"]["items"][i]["title"][0].toLowerCase(); | ||
if (title.toLowerCase() == t) { | ||
const doi = data["message"]["items"][i]["DOI"]; | ||
return { doi: doi, isbn: null, arxiv: null, path: null }; | ||
} | ||
} | ||
return null; | ||
} | ||
catch (_a) { | ||
logger_1.logger.warn("Failed to get information from doi: " + URL); | ||
return null; | ||
} | ||
}); | ||
} | ||
exports.getDocIDFromTitle = getDocIDFromTitle; | ||
function getDocID(pdf, papers_dir, is_book, download_url) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const pdf_fullpath = path_1.default.join(papers_dir, pdf); | ||
const manuallyWrittenDocID = getDocIDManuallyWritten(pdf); | ||
if (manuallyWrittenDocID != null) { | ||
return manuallyWrittenDocID; | ||
} | ||
if (download_url != null) { | ||
const docIDFromUrl = getDocIDFromUrl(download_url); | ||
if (docIDFromUrl != null) { | ||
return docIDFromUrl; | ||
} | ||
} | ||
// Titles of chapters are sometimes confusing such as "Reference". | ||
if (!is_book) { | ||
const docIDFromTitle = yield getDocIDFromTitle(pdf_fullpath); | ||
if (docIDFromTitle != null) { | ||
return docIDFromTitle; | ||
} | ||
} | ||
let dataBuffer = fs_1.default.readFileSync(pdf_fullpath); | ||
const texts = yield (0, pdf_parse_1.default)(dataBuffer) | ||
.then((data) => { | ||
// See https://www.npmjs.com/package/pdf-parse for usage | ||
return data.text.split(/\r?\n/); | ||
}) | ||
.catch((e) => { | ||
logger_1.logger.warn(e.message); | ||
return null; | ||
}); | ||
if (texts == null) { | ||
return { doi: null, isbn: null, arxiv: null, path: null }; | ||
} | ||
let id = getDocIDFromTexts(texts); | ||
if (is_book) { | ||
id.doi = null; | ||
id.arxiv = null; | ||
id.path = null; | ||
} | ||
return id; | ||
}); | ||
} | ||
exports.getDocID = getDocID; | ||
function getDoiJSON(doi) { | ||
@@ -400,3 +147,6 @@ return __awaiter(this, void 0, void 0, function* () { | ||
else { | ||
logger_1.logger.warn("Failed to get info of " + docID + " using arxiv " + path); | ||
logger_1.logger.warn("Failed to get information of " + | ||
JSON.stringify(docID) + | ||
" using arxiv " + | ||
path); | ||
} | ||
@@ -413,3 +163,6 @@ } | ||
else { | ||
logger_1.logger.warn("Failed to get info of " + docID + " using doi " + path); | ||
logger_1.logger.warn("Failed to get information of " + | ||
JSON.stringify(docID) + | ||
" using doi " + | ||
path); | ||
} | ||
@@ -426,3 +179,6 @@ } | ||
else { | ||
logger_1.logger.warn("Failed to get info of " + docID + " using isbn " + path); | ||
logger_1.logger.warn("Failed to get information of " + | ||
JSON.stringify(docID) + | ||
" using isbn " + | ||
path); | ||
} | ||
@@ -439,3 +195,10 @@ } | ||
if (json_r == null || db_id == null) { | ||
logger_1.logger.warn("Failed to get info of " + docID + path); | ||
logger_1.logger.warn("Failed to get information of " + | ||
JSON.stringify(docID) + | ||
" path = " + | ||
path + | ||
" json_r = " + | ||
JSON.stringify(json_r) + | ||
" db_id = " + | ||
JSON.stringify(db_id)); | ||
return null; | ||
@@ -450,3 +213,3 @@ } | ||
function isValidJsonEntry(json) { | ||
return json["title"] != null && json["path"] != null; | ||
return (json["title"] != null && (json["path"] != null || json["id_type"] == "url")); | ||
} | ||
@@ -467,2 +230,35 @@ function genDummyDB(output) { | ||
exports.genDummyDB = genDummyDB; | ||
function registerWeb(json_db, url, title, comments, tags) { | ||
logger_1.logger.info("url = " + | ||
url + | ||
" title = " + | ||
title + | ||
" tags = " + | ||
tags + | ||
" comments = " + | ||
comments); | ||
const docID = { | ||
url: url, | ||
doi: null, | ||
isbn: null, | ||
path: null, | ||
arxiv: null, | ||
}; | ||
logger_1.logger.info("docID = " + JSON.stringify(docID)); | ||
let json = new Object(); | ||
json["title"] = title; | ||
json["comments"] = comments; | ||
json["tags"] = tags; | ||
json["id_type"] = "url"; | ||
if (isValidJsonEntry(json)) { | ||
json_db["url_" + url] = json; | ||
logger_1.logger.info("Register url_" + url); | ||
return json_db; | ||
} | ||
else { | ||
logger_1.logger.warn("Failed to register url_" + url); | ||
return json_db; | ||
} | ||
} | ||
exports.registerWeb = registerWeb; | ||
function registerNonBookPDF(papers_dir, pdf, json_db, comments, tags, rename_using_title, download_url) { | ||
@@ -478,4 +274,10 @@ return __awaiter(this, void 0, void 0, function* () { | ||
comments); | ||
const docID = yield getDocID(pdf, papers_dir, false, download_url); | ||
const docID = yield (0, docid_1.getDocID)(pdf, papers_dir, false, download_url); | ||
logger_1.logger.info("docID = " + JSON.stringify(docID)); | ||
if (docID.arxiv == null && | ||
docID.doi == null && | ||
docID.isbn == null && | ||
docID.path == null) { | ||
logger_1.logger.fatal("Cannot get docID of " + pdf); | ||
} | ||
const t = yield getJson(docID, pdf); | ||
@@ -514,2 +316,5 @@ if (t == null) { | ||
} | ||
else { | ||
return json_db; | ||
} | ||
}); | ||
@@ -522,2 +327,3 @@ } | ||
for (let i = 0; i < book_dirs.length; i++) { | ||
// TODO: OS dependency | ||
if (book_dirs[i].slice(-1) != "/") { | ||
@@ -566,3 +372,3 @@ book_dirs[i] = book_dirs[i] + "/"; | ||
is_book = true; | ||
const docID = yield getDocID(p, papers_dir, true, null); | ||
const docID = yield (0, docid_1.getDocID)(p, papers_dir, true, null); | ||
const t = yield getJson(docID, p); | ||
@@ -597,4 +403,3 @@ if (t != null && t[0]["id_type"] == "isbn") { | ||
let chapter_info = JSON.parse(JSON.stringify(book_info)); | ||
chapter_info["title"] = | ||
chapter_info["title"] + "/" + path_1.default.basename(chapter_path, ".pdf"); | ||
chapter_info["title"] = path_1.default.join(chapter_info["title"], path_1.default.basename(chapter_path, ".pdf")); | ||
chapter_info["id_type"] = "book"; | ||
@@ -617,3 +422,4 @@ chapter_info["path"] = chapter_path; | ||
if (not_registerd_pdfs.length > 0) { | ||
logger_1.logger.warn(not_registerd_pdfs.length, " files are not registered. Please edit edit_and_run.sh and run it so that we can find IDs."); | ||
logger_1.logger.warn(not_registerd_pdfs.length + | ||
" files are not registered. Please edit edit_and_run.sh and run it so that we can find IDs."); | ||
const register_shellscript = "edit_and_run.sh"; | ||
@@ -620,0 +426,0 @@ let commands = ""; |
@@ -13,5 +13,6 @@ "use strict"; | ||
const gen_1 = require("./gen"); | ||
const docid_1 = require("./docid"); | ||
test.skip("DOI from title", () => __awaiter(void 0, void 0, void 0, function* () { | ||
const pdf = "/papers/[Thomas van Noort, Peter Achten, Rinus Plasmeijer]Ad-hoc Polymorphism and Dynamic Typing in a Statically Typed Functional Language.pdf"; | ||
const docID = yield (0, gen_1.getDocIDFromTitle)(pdf); | ||
const docID = yield (0, docid_1.getDocIDFromTitle)(pdf, "hoge"); | ||
expect(docID === null || docID === void 0 ? void 0 : docID.doi).toBe("10.1145/1863495.1863505"); | ||
@@ -26,3 +27,3 @@ })); | ||
const pdf = "DistributedLearning/[Jeffrey Dean] Large Scale Distributed Deep Networks [jendeley no id].pdf"; | ||
const docID = yield (0, gen_1.getDocID)(pdf, "/hoge/", false, null); | ||
const docID = yield (0, docid_1.getDocID)(pdf, "/hoge/", false, null); | ||
const t = yield (0, gen_1.getJson)(docID, pdf); | ||
@@ -39,7 +40,7 @@ expect(t).toBeTruthy(); | ||
test("ISBN from text", () => __awaiter(void 0, void 0, void 0, function* () { | ||
const docID1 = yield (0, gen_1.getDocIDFromTexts)([ | ||
const docID1 = yield (0, docid_1.getDocIDFromTexts)([ | ||
"ISBN 0-262-16209-1 (hc. : alk. paper)", | ||
]); | ||
expect(docID1.isbn).toBe("0262162091"); | ||
const docID2 = yield (0, gen_1.getDocIDFromTexts)([ | ||
const docID2 = yield (0, docid_1.getDocIDFromTexts)([ | ||
"ISBN: 0262162091 (hc. : alk. paper)", | ||
@@ -51,3 +52,3 @@ ]); | ||
const pdf4 = "hoge_no_id.pdf"; | ||
const docID4 = yield (0, gen_1.getDocID)(pdf4, "/hoge/", false, null); | ||
const docID4 = yield (0, docid_1.getDocID)(pdf4, "/hoge/", false, null); | ||
expect(docID4).toStrictEqual({ | ||
@@ -63,3 +64,3 @@ arxiv: null, | ||
const url = "https://arxiv.org/pdf/2212.07677.pdf"; | ||
const docID = yield (0, gen_1.getDocID)(pdf, "/hoge/", false, url); | ||
const docID = yield (0, docid_1.getDocID)(pdf, "/hoge/", false, url); | ||
expect(docID).toStrictEqual({ | ||
@@ -74,3 +75,3 @@ arxiv: "2212.07677", | ||
const pdf5 = "hoge_isbn_9781467330763.pdf"; | ||
const docID5 = yield (0, gen_1.getDocID)(pdf5, "/hoge/", false, null); | ||
const docID5 = yield (0, docid_1.getDocID)(pdf5, "/hoge/", false, null); | ||
expect(docID5).toStrictEqual({ | ||
@@ -85,3 +86,3 @@ arxiv: null, | ||
const pdf6 = "hoge_doi_10_1145_3290364.pdf"; | ||
const docID6 = yield (0, gen_1.getDocID)(pdf6, "/hoge/", false, null); | ||
const docID6 = yield (0, docid_1.getDocID)(pdf6, "/hoge/", false, null); | ||
expect(docID6).toStrictEqual({ | ||
@@ -94,3 +95,3 @@ arxiv: null, | ||
const pdf7 = "A Dependently Typed Assembly Language_doi_10_1145_507635_507657.pdf"; | ||
const docID7 = yield (0, gen_1.getDocID)(pdf7, "/hoge/", false, null); | ||
const docID7 = yield (0, docid_1.getDocID)(pdf7, "/hoge/", false, null); | ||
expect(docID7).toStrictEqual({ | ||
@@ -105,3 +106,3 @@ arxiv: null, | ||
const pdf2 = "DependentType/[EDWIN BRADY] Idris, a General Purpose Dependently Typed Programming Language- Design and Implementation_doi_10_1017_S095679681300018X.pdf"; | ||
const docID2 = yield (0, gen_1.getDocID)(pdf2, "/hoge/", false, null); | ||
const docID2 = yield (0, docid_1.getDocID)(pdf2, "/hoge/", false, null); | ||
expect(docID2).toStrictEqual({ | ||
@@ -114,3 +115,3 @@ arxiv: null, | ||
const pdf4 = "MemoryModel/[Scott Owens, Susmit Sarkar, Peter Sewell] A Better x86 Memory Model x86-TSO_doi_10_1007_978-3-642-03359-9_27.pdf"; | ||
const docID4 = yield (0, gen_1.getDocID)(pdf4, "/hoge/", false, null); | ||
const docID4 = yield (0, docid_1.getDocID)(pdf4, "/hoge/", false, null); | ||
expect(docID4).toStrictEqual({ | ||
@@ -123,3 +124,3 @@ arxiv: null, | ||
const pdf5 = "Riffle An Efficient Communication System with Strong Anonymity_doi_10_1515_popets-2016-0008.pdf"; | ||
const docID5 = yield (0, gen_1.getDocID)(pdf5, "/hoge/", false, null); | ||
const docID5 = yield (0, docid_1.getDocID)(pdf5, "/hoge/", false, null); | ||
expect(docID5).toStrictEqual({ | ||
@@ -132,3 +133,3 @@ arxiv: null, | ||
const pdf7 = "[Peter Dybjer] Inductive families_doi_10_1007_BF01211308.pdf"; | ||
const docID7 = yield (0, gen_1.getDocID)(pdf7, "/hoge/", false, null); | ||
const docID7 = yield (0, docid_1.getDocID)(pdf7, "/hoge/", false, null); | ||
expect(docID7).toStrictEqual({ | ||
@@ -141,3 +142,3 @@ arxiv: null, | ||
const pdf9 = "[Henk Barendregt] Lambda Calculus with Types_doi_10_1017_CBO9781139032636.pdf"; | ||
const docID9 = yield (0, gen_1.getDocID)(pdf9, "/hoge/", false, null); | ||
const docID9 = yield (0, docid_1.getDocID)(pdf9, "/hoge/", false, null); | ||
expect(docID9).toStrictEqual({ | ||
@@ -152,3 +153,3 @@ arxiv: null, | ||
const pdf1 = "Call-by-name, call-by-value and the λ-calculus_doi_10_1016_0304-3975(75)90017-1.pdf"; | ||
const docID1 = yield (0, gen_1.getDocID)(pdf1, "/hoge/", false, null); | ||
const docID1 = yield (0, docid_1.getDocID)(pdf1, "/hoge/", false, null); | ||
expect(docID1).toStrictEqual({ | ||
@@ -161,3 +162,3 @@ arxiv: null, | ||
const pdf3 = "Emerging-MPEG-Standards-for-Point-Cloud-Compression_doi_10_1109_JETCAS_2018_2885981.pdf"; | ||
const docID3 = yield (0, gen_1.getDocID)(pdf3, "/hoge/", false, null); | ||
const docID3 = yield (0, docid_1.getDocID)(pdf3, "/hoge/", false, null); | ||
expect(docID3).toStrictEqual({ | ||
@@ -170,3 +171,3 @@ arxiv: null, | ||
const pdf10 = "[John C. Reynolds] Separation Logic A Logic for Shared Mutable Data Structures_doi_10_1109_LICS_2002_1029817.pdf"; | ||
const docID10 = yield (0, gen_1.getDocID)(pdf10, "/hoge/", false, null); | ||
const docID10 = yield (0, docid_1.getDocID)(pdf10, "/hoge/", false, null); | ||
expect(docID10).toStrictEqual({ | ||
@@ -181,3 +182,3 @@ arxiv: null, | ||
const pdf6 = "MultistageProgramming/[Oleg Kiselyov] The Design and Implementation of BER MetaOCaml_doi_10_1007_978-3-319-07151-0_6.pdf"; | ||
const docID6 = yield (0, gen_1.getDocID)(pdf6, "/hoge/", false, null); | ||
const docID6 = yield (0, docid_1.getDocID)(pdf6, "/hoge/", false, null); | ||
expect(docID6).toStrictEqual({ | ||
@@ -190,3 +191,3 @@ arxiv: null, | ||
const pdf11 = "[Paul Blain Levy] Call By Push Value_doi_10_1007_978-94-007-0954-6.pdf"; | ||
const docID11 = yield (0, gen_1.getDocID)(pdf11, "/hoge/", false, null); | ||
const docID11 = yield (0, docid_1.getDocID)(pdf11, "/hoge/", false, null); | ||
expect(docID11).toStrictEqual({ | ||
@@ -201,3 +202,3 @@ arxiv: null, | ||
const pdf8 = "lonelyplanet-china-15-full-book.pdf"; | ||
const docID8 = yield (0, gen_1.getDocID)(pdf8, "/hoge/", false, null); | ||
const docID8 = yield (0, docid_1.getDocID)(pdf8, "/hoge/", false, null); | ||
expect(docID8).toStrictEqual({ | ||
@@ -204,0 +205,0 @@ arxiv: null, |
@@ -30,10 +30,6 @@ #!/usr/bin/env node | ||
: options._optionValues.book_dirs; | ||
// TODO: Get OS independent path delimiter. | ||
const pd = options._optionValues.papers_dir.slice(-1) == "/" | ||
? options._optionValues.papers_dir | ||
: options._optionValues.papers_dir + "/"; | ||
const db_name = options._optionValues.db_name == undefined | ||
? "jendeley_db.json" | ||
: options._optionValues.db_name; | ||
(0, gen_1.genDB)(pd, book_dirs_str, db_name); | ||
(0, gen_1.genDB)(options._optionValues.papers_dir, book_dirs_str, db_name); | ||
}); | ||
@@ -40,0 +36,0 @@ program |
@@ -16,4 +16,2 @@ "use strict"; | ||
exports.startServer = void 0; | ||
const base_64_1 = __importDefault(require("base-64")); | ||
const url_1 = __importDefault(require("url")); | ||
const path_1 = __importDefault(require("path")); | ||
@@ -25,137 +23,4 @@ const cors_1 = __importDefault(require("cors")); | ||
const body_parser_1 = __importDefault(require("body-parser")); | ||
const https_1 = __importDefault(require("https")); | ||
const gen_1 = require("./gen"); | ||
const logger_1 = require("./logger"); | ||
const constants_1 = require("./constants"); | ||
function checkEntry(entry) { | ||
console.assert(entry.title != null && entry.path != null, "id = ", entry.id, "entry = ", JSON.stringify(entry, null, 2)); | ||
} | ||
function getEntry(id, json) { | ||
console.assert(json[id] != null, "json[" + id + "] != null"); | ||
if (json[id]["id_type"] == "isbn" || json[id]["id_type"] == "book") { | ||
const title = json[id]["title"]; | ||
const path = json[id]["path"]; | ||
let authors = []; | ||
if (json[id]["authors"] != null) { | ||
authors = json[id]["authors"]; | ||
} | ||
let year = null; | ||
if (json[id]["publishedDate"] != null && | ||
!isNaN(parseInt(json[id]["publishedDate"].substr(0, 4)))) { | ||
year = parseInt(json[id]["publishedDate"].substr(0, 4)); | ||
} | ||
let publisher = ""; | ||
if (json[id]["publisher"] != null) { | ||
publisher = json[id]["publisher"]; | ||
} | ||
const tags = json[id]["tags"] != undefined ? json[id]["tags"] : []; | ||
const comments = json[id]["comments"] != undefined ? json[id]["comments"] : []; | ||
const abstract = ""; | ||
const e = { | ||
id: id, | ||
title: title, | ||
authors: authors, | ||
tags: tags, | ||
comments: comments, | ||
abstract: abstract, | ||
path: path, | ||
year: year, | ||
publisher: publisher, | ||
}; | ||
checkEntry(e); | ||
return e; | ||
} | ||
else if (json[id]["id_type"] == "doi") { | ||
const title = json[id]["title"]; | ||
const path = json[id]["path"]; | ||
let authors = []; | ||
if (json[id]["author"] != undefined) { | ||
for (let i = 0; i < json[id]["author"].length; i++) { | ||
authors.push(json[id]["author"][i]["given"] + " " + json[id]["author"][i]["family"]); | ||
} | ||
} | ||
let year = null; | ||
if (json[id]["published-print"] != null) { | ||
year = json[id]["published-print"]["date-parts"][0][0]; | ||
} | ||
else if (json[id]["created"] != null) { | ||
year = json[id]["created"]["date-parts"][0][0]; | ||
} | ||
const publisher = json[id]["event"] != null ? json[id]["event"] : ""; | ||
const abstract = json[id]["abstract"] != null ? json[id]["abstract"] : ""; | ||
const tags = json[id]["tags"] != undefined ? json[id]["tags"] : []; | ||
const comments = json[id]["comments"] != undefined ? json[id]["comments"] : []; | ||
const e = { | ||
id: id, | ||
title: title, | ||
authors: authors, | ||
tags: tags, | ||
comments: comments, | ||
abstract: abstract, | ||
path: path, | ||
year: year, | ||
publisher: publisher, | ||
}; | ||
checkEntry(e); | ||
return e; | ||
} | ||
else if (json[id]["id_type"] == "arxiv") { | ||
const title = json[id]["title"]; | ||
const path = json[id]["path"]; | ||
let authors = []; | ||
if (json[id]["author"].length != undefined) { | ||
for (let i = 0; i < json[id]["author"].length; i++) { | ||
authors.push(json[id]["author"][i]["name"]); | ||
} | ||
} | ||
else { | ||
authors.push(json[id]["author"]["name"]); | ||
} | ||
let year = null; | ||
if (json[id]["published"] != null && | ||
!isNaN(parseInt(json[id]["published"].substr(0, 4)))) { | ||
year = parseInt(json[id]["published"].substr(0, 4)); | ||
} | ||
const publisher = json[id]["event"] != null ? json[id]["event"] : ""; | ||
const abstract = json[id]["summary"] != null ? json[id]["summary"] : ""; | ||
const tags = json[id]["tags"] != undefined ? json[id]["tags"] : []; | ||
const comments = json[id]["comments"] != undefined ? json[id]["comments"] : []; | ||
const e = { | ||
id: id, | ||
title: title, | ||
authors: authors, | ||
tags: tags, | ||
abstract: abstract, | ||
comments: comments, | ||
path: path, | ||
year: year, | ||
publisher: publisher, | ||
}; | ||
checkEntry(e); | ||
return e; | ||
} | ||
else { | ||
const title = json[id]["title"]; | ||
const path = json[id]["path"]; | ||
const tags = json[id]["tags"] != undefined ? json[id]["tags"] : []; | ||
const comments = json[id]["comments"] != undefined ? json[id]["comments"] : []; | ||
const authors = []; | ||
const abstract = json[id]["abstract"] != null ? json[id]["abstract"] : ""; | ||
const year = null; | ||
const publisher = ""; | ||
const e = { | ||
id: id, | ||
title: title, | ||
authors: authors, | ||
tags: tags, | ||
abstract: abstract, | ||
comments: comments, | ||
path: path, | ||
year: year, | ||
publisher: publisher, | ||
}; | ||
checkEntry(e); | ||
return e; | ||
} | ||
} | ||
const api_1 = require("./api"); | ||
function startServer(db_path) { | ||
@@ -172,137 +37,19 @@ if (fs_1.default.existsSync(db_path)) { | ||
app.get("/api/get_db", (request, response) => { | ||
logger_1.logger.info("Get a get_db request" + request.url); | ||
const json = JSON.parse(fs_1.default.readFileSync(db_path).toString()); | ||
let db_response = []; | ||
for (const id of Object.keys(json)) { | ||
if (json[id] == null) | ||
continue; | ||
const e = getEntry(id, json); | ||
db_response.push(e); | ||
} | ||
response.writeHead(200, { | ||
"Content-Type": "application/json", | ||
"Access-Control-Allow-Origin": "*", | ||
"Access-Control-Allow-Methods": "GET,PUT,POST,DELETE", | ||
}); | ||
response.end(JSON.stringify(db_response)); | ||
logger_1.logger.info("Sent a response from get_db"); | ||
(0, api_1.get_db)(request, response, db_path); | ||
}); | ||
app.get("/api/get_pdf", (request, response) => { | ||
logger_1.logger.info("Get a get_pdf request", request.url); | ||
const params = url_1.default.parse(request.url, true).query; | ||
const pdf_path = unescape(base_64_1.default.decode(params.file)); | ||
const pdf = fs_1.default.readFileSync(path_1.default.join(path_1.default.dirname(db_path), pdf_path)); | ||
response.writeHead(200, { | ||
"Content-Type": "application/pdf", | ||
"Access-Control-Allow-Origin": "*", | ||
"Access-Control-Allow-Methods": "GET,PUT,POST,DELETE", | ||
}); | ||
response.end(pdf); | ||
logger_1.logger.info("Sent a response from get_pdf"); | ||
(0, api_1.get_pdf)(request, response, db_path); | ||
}); | ||
let jsonParser = body_parser_1.default.json(); | ||
app.put("/api/add_from_url", jsonParser, (httpRequest, response) => __awaiter(this, void 0, void 0, function* () { | ||
// TODO: Handle RequestGetFromURL.isbn/doi/comments/tags | ||
const req = httpRequest.body; | ||
logger_1.logger.info("Get a add_from_url request url = " + | ||
httpRequest.url + | ||
" req = " + | ||
JSON.stringify(req)); | ||
const filename = "[jendeley download " + Date.now().toString() + "].pdf"; | ||
const download = (uri, filename) => { | ||
const options = { | ||
headers: { | ||
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0", | ||
}, | ||
}; | ||
return new Promise((resolve, reject) => https_1.default | ||
.request(uri, options, (res) => { | ||
res | ||
.pipe(fs_1.default.createWriteStream(filename)) | ||
.on("close", resolve) | ||
.on("error", reject); | ||
}) | ||
.end()); | ||
}; | ||
yield download(req.url, path_1.default.join(path_1.default.dirname(db_path), filename)); | ||
let json = JSON.parse(fs_1.default.readFileSync(db_path).toString()); | ||
const date = new Date(); | ||
const date_tag = date.toISOString().split("T")[0]; | ||
const tags = req.tags; | ||
tags.push(date_tag); | ||
json = yield (0, gen_1.registerNonBookPDF)(path_1.default.dirname(db_path), filename, json, req.comments, tags, true, req.url); | ||
fs_1.default.writeFileSync(db_path, JSON.stringify(json)); | ||
response.writeHead(200, { | ||
"Content-Type": "application/json", | ||
"Access-Control-Allow-Origin": "*", | ||
"Access-Control-Allow-Methods": "GET,PUT,POST,DELETE", | ||
}); | ||
response.end(); | ||
logger_1.logger.info("Sent a response from add_from_url"); | ||
app.put("/api/add_pdf_from_url", jsonParser, (httpRequest, response) => __awaiter(this, void 0, void 0, function* () { | ||
(0, api_1.add_pdf_from_url)(httpRequest, response, db_path); | ||
})); | ||
app.put("/api/add_web_from_url", jsonParser, (httpRequest, response) => __awaiter(this, void 0, void 0, function* () { | ||
(0, api_1.add_web_from_url)(httpRequest, response, db_path); | ||
})); | ||
app.put("/api/update_entry", jsonParser, (request, response) => { | ||
logger_1.logger.info("Get a update_entry request url = " + request.url); | ||
const entry_o = request.body; | ||
// TODO: Is there any more sophisticated way to check user defined type? | ||
if (entry_o["id"] != undefined && | ||
entry_o["tags"] != undefined && | ||
entry_o["comments"] != undefined) { | ||
const entry = entry_o; | ||
let json = JSON.parse(fs_1.default.readFileSync(db_path).toString()); | ||
if (json[entry.id] != undefined) { | ||
logger_1.logger.info("Update DB with entry = " + JSON.stringify(entry)); | ||
json[entry.id]["tags"] = entry.tags; | ||
json[entry.id]["comments"] = entry.comments; | ||
} | ||
fs_1.default.writeFileSync(db_path, JSON.stringify(json)); | ||
} | ||
else { | ||
logger_1.logger.warn("Object from the client is not legitimated. entry_o = " + | ||
JSON.stringify(entry_o)); | ||
} | ||
response.writeHead(200, { | ||
"Content-Type": "application/json", | ||
"Access-Control-Allow-Origin": "*", | ||
"Access-Control-Allow-Methods": "GET,PUT,POST,DELETE", | ||
}); | ||
response.end(); | ||
logger_1.logger.info("Sent a response from update_entry"); | ||
(0, api_1.update_entry)(request, response, db_path); | ||
}); | ||
app.delete("/api/delete_entry", jsonParser, (request, response) => { | ||
logger_1.logger.info("Get a delete_entry request url = " + request.url); | ||
const entry_o = request.body; | ||
if (entry_o["id"] != undefined) { | ||
const entry = entry_o; | ||
let json = JSON.parse(fs_1.default.readFileSync(db_path).toString()); | ||
if (json[entry.id] != undefined && | ||
json[entry.id]["path"] != undefined) { | ||
logger_1.logger.info("Delete " + json[entry.id]["path"]); | ||
const old_filename = path_1.default.join(path_1.default.dirname(db_path), json[entry.id]["path"]); | ||
const dir = path_1.default.dirname(old_filename); | ||
const new_filename = path_1.default.join(dir, path_1.default.basename(old_filename, ".pdf") + | ||
" " + | ||
constants_1.JENDELEY_NO_TRACK + | ||
".pdf"); | ||
if (!fs_1.default.existsSync(old_filename)) { | ||
logger_1.logger.info("Rename " + old_filename + " to " + new_filename); | ||
fs_1.default.renameSync(old_filename, new_filename); | ||
} | ||
else { | ||
logger_1.logger.warn("Failed to rename " + old_filename + " to " + new_filename); | ||
} | ||
delete json[entry.id]; | ||
} | ||
fs_1.default.writeFileSync(db_path, JSON.stringify(json)); | ||
} | ||
else { | ||
logger_1.logger.warn("Object from the client is not legitimated. entry_o = " + | ||
JSON.stringify(entry_o)); | ||
} | ||
response.writeHead(200, { | ||
"Content-Type": "application/json", | ||
"Access-Control-Allow-Origin": "*", | ||
"Access-Control-Allow-Methods": "GET,PUT,POST,DELETE", | ||
}); | ||
response.end(); | ||
logger_1.logger.info("Sent a response from delete_entry"); | ||
(0, api_1.delete_entry)(request, response, db_path); | ||
}); | ||
@@ -309,0 +56,0 @@ app.listen(port, () => { |
@@ -6,3 +6,3 @@ { | ||
}, | ||
"version": "0.0.8", | ||
"version": "0.0.9", | ||
"description": "", | ||
@@ -40,2 +40,3 @@ "main": "index.js", | ||
"@types/base-64": "^1.0.0", | ||
"@types/express": "^4.17.15", | ||
"@types/jest": "^29.2.3", | ||
@@ -53,2 +54,3 @@ "@types/node": "^18.11.9", | ||
"body-parser": "^1.20.1", | ||
"cheerio": "^1.0.0-rc.12", | ||
"commander": "^9.4.1", | ||
@@ -62,2 +64,3 @@ "cors": "^2.8.5", | ||
"pdf-parse": "^1.1.1", | ||
"pdf.js-extract": "^0.2.1", | ||
"pino": "^8.7.0", | ||
@@ -64,0 +67,0 @@ "pino-pretty": "^9.1.1", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Unidentified License
License(Experimental) Something that seems like a license was found, but its contents could not be matched with a known license.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Unidentified License
License(Experimental) Something that seems like a license was found, but its contents could not be matched with a known license.
Found 1 instance in 1 package
4214756
39
4791
16
10
5
11
+ Addedcheerio@^1.0.0-rc.12
+ Addedpdf.js-extract@^0.2.1
+ Addedboolbase@1.0.0(transitive)
+ Addedcheerio@1.0.0(transitive)
+ Addedcheerio-select@2.1.0(transitive)
+ Addedcss-select@5.1.0(transitive)
+ Addedcss-what@6.1.0(transitive)
+ Addeddommatrix@0.0.24(transitive)
+ Addedencoding-sniffer@0.2.0(transitive)
+ Addedhtmlparser2@9.1.0(transitive)
+ Addediconv-lite@0.6.3(transitive)
+ Addednanoid@3.3.7(transitive)
+ Addednth-check@2.1.1(transitive)
+ Addedparse5@7.2.1(transitive)
+ Addedparse5-htmlparser2-tree-adapter@7.1.0(transitive)
+ Addedparse5-parser-stream@7.1.2(transitive)
+ Addedpdf.js-extract@0.2.1(transitive)
+ Addedundici@6.21.0(transitive)
+ Addedweb-streams-polyfill@3.2.0(transitive)
+ Addedwhatwg-encoding@3.1.1(transitive)
+ Addedwhatwg-mimetype@4.0.0(transitive)
- Removednanoid@3.3.8(transitive)