@a_kawashiro/jendeley
Advanced tools
Comparing version 2.4.3 to 2.5.0
@@ -348,3 +348,3 @@ "use strict"; | ||
exports.getTitleFromUrl = getTitleFromUrl; | ||
async function addWebFromUrl(httpRequest, response, dbPath) { | ||
async function addWebFromUrl(httpRequest, response, dbPath, experimentalUseOllamaServer) { | ||
const req = httpRequest.body; | ||
@@ -361,6 +361,6 @@ logger_1.logger.info("Get a add_web_from_url request url = " + | ||
const titleOrError = await getTitleFromUrl(req.url); | ||
if (titleOrError._tag === "left") { | ||
if ((0, either_1.isLeft)(titleOrError)) { | ||
const r = { | ||
isSucceeded: false, | ||
message: titleOrError.left, | ||
message: (0, either_1.getLeft)(titleOrError), | ||
}; | ||
@@ -371,3 +371,3 @@ response.status(500).json(r); | ||
else { | ||
title = titleOrError.right; | ||
title = (0, either_1.getRight)(titleOrError); | ||
} | ||
@@ -382,3 +382,3 @@ } | ||
tags.push(date_tag); | ||
const newDBOrError = await (0, gen_1.registerWeb)(jsonDB, req.url, title, req.comments, tags); | ||
const newDBOrError = await (0, gen_1.registerWeb)(jsonDB, req.url, title, req.comments, tags, experimentalUseOllamaServer); | ||
if (newDBOrError._tag === "right") { | ||
@@ -385,0 +385,0 @@ (0, load_db_1.saveDB)(newDBOrError.right, dbPath); |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.MAX_ENTRIES_IN_GET_API = exports.JENDELEY_DIR = exports.JENDELEY_VERSION = exports.ARXIV_API_URL = exports.ID_TYPE_URL = exports.ID_TYPE_META = exports.ID_TYPE_BOOK = exports.ID_TYPE_PATH = exports.ID_TYPE_ISBN = exports.ID_TYPE_DOI = exports.ID_TYPE_ARXIV = exports.ENTRY_DATA_FROM_ARXIV = exports.ENTRY_TITLE = exports.ENTRY_TEXT = exports.ENTRY_COMMENTS = exports.ENTRY_TAGS = exports.ENTRY_URL = exports.ENTRY_PATH = exports.ENTRY_AUTHORS = exports.ENTRY_ID_TYPE = exports.DB_META_KEY = exports.JENDELEY_NO_ID = exports.JENDELEY_NO_TRACK = exports.TITLE_EDITABLE_ID_TYPES = exports.AUTHORES_EDITABLE_ID_TYPES = void 0; | ||
const JENDELEY_VERSION = "2.4.3"; | ||
exports.OLLAMA_SERVER = exports.MAX_ENTRIES_IN_GET_API = exports.JENDELEY_DIR = exports.JENDELEY_VERSION = exports.ARXIV_API_URL = exports.ID_TYPE_URL = exports.ID_TYPE_META = exports.ID_TYPE_BOOK = exports.ID_TYPE_PATH = exports.ID_TYPE_ISBN = exports.ID_TYPE_DOI = exports.ID_TYPE_ARXIV = exports.ENTRY_DATA_FROM_ARXIV = exports.ENTRY_TITLE = exports.ENTRY_TEXT = exports.ENTRY_COMMENTS = exports.ENTRY_TAGS = exports.ENTRY_URL = exports.ENTRY_PATH = exports.ENTRY_AUTHORS = exports.ENTRY_ID_TYPE = exports.DB_META_KEY = exports.JENDELEY_NO_ID = exports.JENDELEY_NO_TRACK = exports.TITLE_EDITABLE_ID_TYPES = exports.AUTHORES_EDITABLE_ID_TYPES = void 0; | ||
const JENDELEY_VERSION = "2.5.0"; | ||
exports.JENDELEY_VERSION = JENDELEY_VERSION; | ||
@@ -54,2 +54,4 @@ const JENDELEY_NO_TRACK = "[jendeley no track]"; | ||
exports.MAX_ENTRIES_IN_GET_API = MAX_ENTRIES_IN_GET_API; | ||
const OLLAMA_SERVER = "http://localhost:11434/"; | ||
exports.OLLAMA_SERVER = OLLAMA_SERVER; | ||
//# sourceMappingURL=constants.js.map |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.isRight = exports.genLeft = exports.genRight = void 0; | ||
exports.getRight = exports.getLeft = exports.isLeft = exports.isRight = exports.genLeft = exports.genRight = void 0; | ||
function genRight(r) { | ||
@@ -16,2 +16,24 @@ return { _tag: "right", right: r }; | ||
exports.isRight = isRight; | ||
function isLeft(e) { | ||
return e._tag === "left"; | ||
} | ||
exports.isLeft = isLeft; | ||
function getLeft(e) { | ||
if (e._tag === "left") { | ||
return e.left; | ||
} | ||
else { | ||
throw new Error(e + " is not a left"); | ||
} | ||
} | ||
exports.getLeft = getLeft; | ||
function getRight(e) { | ||
if (e._tag === "right") { | ||
return e.right; | ||
} | ||
else { | ||
throw new Error(e + " is not a right"); | ||
} | ||
} | ||
exports.getRight = getRight; | ||
//# sourceMappingURL=either.js.map |
@@ -16,2 +16,3 @@ "use strict"; | ||
const logger_1 = require("./logger"); | ||
const tag_generate_1 = require("./tag_generate"); | ||
const constants_1 = require("./constants"); | ||
@@ -298,3 +299,17 @@ const docid_1 = require("./docid"); | ||
exports.genDummyDB = genDummyDB; | ||
async function registerWeb(jsonDB, url, title, comments, tags) { | ||
function getTagCandiates(jsonDB) { | ||
let tag_candidates = []; | ||
for (const id of Object.keys(jsonDB)) { | ||
const e = jsonDB[id]; | ||
if (e.idType != "meta") { | ||
for (const t of e.tags) { | ||
if (!tag_candidates.includes(t)) { | ||
tag_candidates.push(t); | ||
} | ||
} | ||
} | ||
} | ||
return tag_candidates; | ||
} | ||
async function registerWeb(jsonDB, url, title, comments, tags, experimentalUseOllamaServer) { | ||
logger_1.logger.info("url = " + | ||
@@ -312,5 +327,20 @@ url + | ||
const res = await (0, node_fetch_1.default)(new node_fetch_2.Request(url, options)); | ||
const html = res.text(); | ||
const html = await res.text(); | ||
logger_1.logger.debug("Fetched from " + url + ":\n" + html); | ||
const { convert } = require("html-to-text"); | ||
const text = convert(html, {}); | ||
logger_1.logger.info("experimentalUseOllamaServer = ", experimentalUseOllamaServer); | ||
if (experimentalUseOllamaServer) { | ||
logger_1.logger.info("Use ollama server to generate tags."); | ||
const tag_candidates = getTagCandiates(jsonDB); | ||
const generated_tags = await (0, tag_generate_1.genTags)(constants_1.OLLAMA_SERVER, title, text, tag_candidates); | ||
if (generated_tags._tag === "left") { | ||
return (0, either_1.genLeft)(generated_tags.left); | ||
} | ||
else { | ||
for (const t of generated_tags.right) { | ||
tags.push(t); | ||
} | ||
} | ||
} | ||
let json = { | ||
@@ -317,0 +347,0 @@ title: title, |
@@ -61,5 +61,6 @@ #!/usr/bin/env node | ||
.option("--port <port>", "Use if the default port 5000 is used.", "5000") | ||
.option("--experimental_use_ollama_server", "Use ollama server to generate tags. This is an experimental feature.") | ||
.action((cmd, options) => { | ||
const port_n = parseInt(options._optionValues.port, 10); | ||
(0, server_1.startServer)((0, path_util_1.pathStrToDirs)(path_1.default.resolve(options._optionValues.db)), options._optionValues.allow_cors, port_n); | ||
(0, server_1.startServer)((0, path_util_1.pathStrToDirs)(path_1.default.resolve(options._optionValues.db)), options._optionValues.allow_cors, port_n, options._optionValues.experimental_use_ollama_server); | ||
}); | ||
@@ -66,0 +67,0 @@ program |
@@ -17,4 +17,17 @@ "use strict"; | ||
const constants_1 = require("./constants"); | ||
function startServer(dbPath, allowCors, port) { | ||
const tag_generate_1 = require("./tag_generate"); | ||
const constants_2 = require("./constants"); | ||
function startServer(dbPath, allowCors, port, experimentalUseOllamaServer) { | ||
logger_1.logger.info("startServer version: " + constants_1.JENDELEY_VERSION); | ||
(0, tag_generate_1.checkOllamaServer)(constants_2.OLLAMA_SERVER).then((result) => { | ||
if (result) { | ||
logger_1.logger.info("Ollama server is available. " + constants_2.OLLAMA_SERVER); | ||
} | ||
else { | ||
logger_1.logger.fatal("Ollama server is not available. " + constants_2.OLLAMA_SERVER); | ||
logger_1.logger.fatal("Please check https://github.com/akawashiro/jendeley/blob/main/README.md to launch ollama server."); | ||
logger_1.logger.fatal("Or just wait for a while to launch ollama server if you already ran ./run_ollama.sh"); | ||
process.exit(1); | ||
} | ||
}); | ||
if (fs_1.default.existsSync((0, path_util_1.concatDirs)(dbPath))) { | ||
@@ -51,3 +64,3 @@ { | ||
app.put("/api/add_web_from_url", jsonParser, async (httpRequest, response) => { | ||
(0, api_1.addWebFromUrl)(httpRequest, response, dbPath); | ||
(0, api_1.addWebFromUrl)(httpRequest, response, dbPath, experimentalUseOllamaServer); | ||
}); | ||
@@ -54,0 +67,0 @@ app.put("/api/update_entry", jsonParser, (request, response) => { |
@@ -6,3 +6,3 @@ { | ||
}, | ||
"version": "2.4.3", | ||
"version": "2.5.0", | ||
"description": "", | ||
@@ -16,5 +16,6 @@ "main": "index.js", | ||
"test": "NODE_OPTIONS=--experimental-vm-modules jest --silent=false --verbose false", | ||
"test_tag": "NODE_OPTIONS=--experimental-vm-modules jest --silent=false src/tag_generate.test.ts", | ||
"scan_test_pdfs": "npm run build && node --require source-map-support/register dist/index.js scan --papers_dir test_pdfs --book_dirs test_pdfs/dummyTapl", | ||
"profile_scan_test_pdfs": "npm run build && node --require source-map-support/register --prof dist/index.js scan --papers_dir test_pdfs", | ||
"scan_test_pdfs_and_launch": "npm run build && node --require source-map-support/register dist/index.js scan --papers_dir test_pdfs --book_dirs test_pdfs/dummyTapl && node --require source-map-support/register dist/index.js launch --db test_pdfs/jendeley_db.json --port 5001 --allow_cors", | ||
"scan_test_pdfs_and_launch": "npm run build && node --require source-map-support/register dist/index.js scan --papers_dir test_pdfs --book_dirs test_pdfs/dummyTapl && node --require source-map-support/register dist/index.js launch --db test_pdfs/jendeley_db.json --port 5001 --allow_cors --experimental_use_ollama_server", | ||
"format": "prettier --write src", | ||
@@ -45,2 +46,3 @@ "lint": "prettier --check src" | ||
"@types/node": "^20.10.4", | ||
"@types/node-fetch": "^2.6.11", | ||
"@types/node-isbn": "^1.6.4", | ||
@@ -47,0 +49,0 @@ "@types/pdf-parse": "^1.1.4", |
199
README.md
# jendeley | ||
`jendeley` is a JSON-based document organizing software. | ||
- `jendeley` is JSON-based. You can see and edit your database easily. | ||
- `jendeley` works locally. Your important database is owned only by you. Not cloud. | ||
- `jendeley` is browser based. You can run it anywhere node.js runs. | ||
- `jendeley` is JSON-based. You can see and edit your database quickly. | ||
- `jendeley` works locally. Your important database is owned only by you. No cloud. | ||
- `jendeley` is browser-based. You can run it anywhere node.js runs. | ||
## Table of Contents | ||
<!-- toc --> | ||
- [Why jendeley?](#why-jendeley) | ||
- [Quickstart](#quickstart) | ||
- [Install](#install) | ||
- [Generate JSON database file](#generate-json-database-file) | ||
* [Recommended filename style](#recommended-filename-style) | ||
* [When `jendeley` fails to scan your PDFs](#when-jendeley-fails-to-scan-your-pdfs) | ||
- [Launch the service daemon and open the web user interface](#launch-the-service-daemon-and-open-the-web-user-interface) | ||
* [Launch `jendeley` automatically](#launch-jendeley-automatically) | ||
+ [Linux](#linux) | ||
+ [Windows](#windows) | ||
- [Use the web user interface](#use-the-web-user-interface) | ||
* [Add a webpage](#add-a-webpage) | ||
* [Add a PDF file on the Web](#add-a-pdf-file-on-the-web) | ||
* [Upload a PDF file](#upload-a-pdf-file) | ||
* [Use tags and comments](#use-tags-and-comments) | ||
- [Advanced topics](#advanced-topics) | ||
* [Check and edit your database (advanced)](#check-and-edit-your-database-advanced) | ||
* [Check source code (advanced)](#check-source-code-advanced) | ||
* [Use LLM (Large Language Model) to generate tags (advanced)](#use-llm-large-language-model-to-generate-tags-advanced) | ||
- [Contact me](#contact-me) | ||
- [Support me](#support-me) | ||
<!-- tocstop --> | ||
## Why jendeley? | ||
As programmers, we need various documents in different formats, such as recent machine learning papers, classic compiler books, CPU and accelerator specification documents, programming language documents, and informative blog articles. To efficiently manage these documents, it's essential to categorize and classify them. Additionally, we need to ensure that they are saved and accessible long-term, as we never know when we need them. | ||
To address these challenges, I developed `jendeley`. It allows you to register both PDFs and webpages in the same database, making categorization easy through the use of tags. Moreover, the database is stored as a plain text JSON file, making it easily editable using your preferred editor. This means that even if `jendeley`'s development process ends, you can still access your information and create alternative applications to manage it. | ||
## Quickstart | ||
```console | ||
$ npm install @a_kawashiro/jendeley -g | ||
$ jendeley scan --papers_dir <YOUR PDFs DIR> | ||
$ jendeley launch --db <YOUR PDFs DIR>/jendeley_db.json | ||
``` | ||
npm install @a_kawashiro/jendeley -g | ||
jendeley scan --papers_dir <YOUR PDFs DIR> | ||
Then you can see a screen like this! | ||
![image](https://akawashiro.github.io/jendeley/blog100/top.png) | ||
## Install | ||
```console | ||
$ npm install @a_kawashiro/jendeley -g | ||
``` | ||
You can find the latest package at [npm page](https://www.npmjs.com/package/@a_kawashiro/jendeley). | ||
## Generate JSON database file | ||
```console | ||
$ jendeley scan --papers_dir <YOUR PDFs DIR> | ||
``` | ||
This command outputs the database to `<YOUR PDFs DIR>/jendeley_db.json`. If you have no PDF file, please specify an empty directory as `<YOUR PDFs DIR>`. | ||
If `jendeley` encounters an issue scanning some PDFs, it generates a shell script named `edit_and_run.sh.` Please refer to the following subsection to learn how to rename the files accordingly, so that jendeley can properly recognize them. | ||
### Recommended filename style | ||
`jendeley` uses a filename to find the document ID (e.g., [DOI](https://www.doi.org/) or [ISBN](https://en.wikipedia.org/wiki/ISBN))). `jendeley` recognizes parts of a filename that are not enclosed by `[` and `]` as the title of the file. So I recommend you to name the file accordingly, for example, | ||
- `RustHorn CHC-based Verification for Rust Programs.pdf` | ||
- When the document's title includes spaces, the filename should include spaces. | ||
- `RustHorn CHC-based Verification for Rust Programs [matushita].pdf` | ||
- If you want to write additional information in a filename, please enclose it by `[` and `]`. | ||
### When `jendeley` fails to scan your PDFs | ||
`jendeley` heavily relies on [DOI](https://www.doi.org/) or [ISBN](https://en.wikipedia.org/wiki/ISBN) to find the title, authors and the year of publication of PDFs. When DOI or ISBN can not be automatically found by `jendeley`, you can manually specify DOI of the PDF using the filename. | ||
- To specify DOI, change the filename to include `[jendeley doi <DOI with all delimiters replaced with underscore>]`. | ||
- For example, `cyclone [jendeley doi 10_1145_512529_512563].pdf`. | ||
- To specify ISBN, change the filename to include `[jendeley isbn <ISBN>]`. | ||
- For example, `Types and Programming Languages [jendeley isbn 0262162091].pdf`. | ||
- When the PDF doesn't have any DOI or ISBN, you can specify it by `[jendeley no id]`. | ||
- For example, `ARM reference manual [jendeley no id].pdf`. | ||
## Launch the service daemon and open the web user interface | ||
``` | ||
jendeley launch --db <YOUR PDFs DIR>/jendeley_db.json | ||
``` | ||
Then you can see a screen like this! | ||
![Web UI](https://raw.githubusercontent.com/akawashiro/jendeley/c4aa45db6da5ff567b819bd3dfa1c40ed97dfe8d/jendeley-backend/webui.png "Web UI") | ||
This command launches the jendeley daemon and opens the web user interface in your web browser at [http://localhost:5000](http://localhost:5000). You have the option to change the default port by using the --port option. | ||
Please check [user document](https://akawashiro.github.io/jendeley/) for more details. | ||
### Launch `jendeley` automatically | ||
#### Linux | ||
When using Linux, you can set up `jendeley` to start automatically by using `systemd`. To do this, create a file named `~/.config/systemd/user/jendeley.service` with the following contents, and then run `systemctl --user enable jendeley && systemctl --user start jendeley`. Then, you can access `jendeley` at [http://localhost:5000](http://localhost:5000). Logs are accessible with the command `journalctl --user -f -u jendeley.service`. | ||
``` | ||
# jendeley.service | ||
[Unit] | ||
Description=jendeley JSON-based document organization software | ||
[Service] | ||
ExecStart=jendeley launch --db <FILL PATH TO THE YOUR DATABASE JSON FILE> | ||
[Install] | ||
WantedBy=default.target | ||
``` | ||
#### Windows | ||
When using Windows, you can set up `jendeley` to launch automatically at startup. To do this, first open the startup directory by pressing `Windows+R` and typing `shell:startup` and then pressing `Enter`. | ||
<img src="https://raw.githubusercontent.com/akawashiro/jendeley/main/win-startup.png" width="50%"> | ||
And make `autorun-jendeley.bat` with the following contents using `notepad.exe`. | ||
``` | ||
:: autorun-jendeley.bat | ||
jendeley launch --db <FILL PATH TO THE YOUR DATABASE JSON FILE> >> <FILL PATH TO THE LOG FILE> | ||
``` | ||
<img src="https://raw.githubusercontent.com/akawashiro/jendeley/main/startup-directory.png" width="70%"> | ||
## Use the web user interface | ||
When `jendeley` launches, `jendeley` opens the web user interface automatically. If not, please access `http://localhost:5000/`. | ||
![image](./blog100/top.png) | ||
### Add a webpage | ||
You can add a webpage to the database using `REGISTER WEBPAGE` button. When you register, you can write tags or comments. Tags are just commas (`,`) separated text. By the way, the date tags are automatically added to the database. | ||
![Register webpage](./blog100/register_webpage.png "Register webpage") | ||
### Add a PDF file on the Web | ||
You can add a PDF file to the web by using the `REGISTER PDF FROM URL` button. When you add the file, `jendeley` will attempt to locate its [Digital object identifier (DOI)](https://www.doi.org/) or [International Standard Book Number (ISBN)](https://en.wikipedia.org/wiki/ISBN) and register the meta information, such as the author's name or publication date, to the database. In some cases, `jendeley` may not be able to find the DOI or ISBN for the PDF, or there may not be a corresponding DOI or ISBN for it. In such situations, you can specify the DOI or ISBN by using the filename. For more information, please refer to the [Recommended filename style](#recommended-filename-style) section. | ||
![Register PDF from URL](./blog100/register_pdf_from_url.png "Register PDF from URL") | ||
### Upload a PDF file | ||
You can upload a PDF file in your computer using `UPLOAD PDF` button. | ||
### Use tags and comments | ||
You can edit tags or comments after you register. You can edit tags or comments by double-clicking them. Comments are interpreted as Markdown. | ||
![Edit comments](./blog100/comments-markdown.png "Edit comments") | ||
Furthermore, you can filter the database using tags or comments. | ||
![Filter by tag](./blog100/filter_by_tag.png "Filter by tag") | ||
## Advanced topics | ||
### Check and edit your database (advanced) | ||
Because `jendeley` is fully JSON-based, you can quickly check the database's contents. | ||
```console | ||
$ cat jendeley_db.json | jq '.' | head | ||
{ | ||
"jendeley_meta": { | ||
"idType": "meta", | ||
"version": "0.0.17" | ||
}, | ||
"doi_10.1145/1122445.1122456": { | ||
"path": "/A Comprehensive Survey of Neural Architecture Search.pdf", | ||
"idType": "doi", | ||
"tags": [], | ||
"comments": "", | ||
``` | ||
You can edit your database using your preferred editor. However, after making the changes, it is important to verify that your database is still valid as a `jendeley` database using the command `jendeley validate --db <PATH TO THE DATABASE>`. | ||
### Check source code (advanced) | ||
You can check the source code [https://github.com/akawashiro/jendeley](https://github.com/akawashiro/jendeley) here. We welcome your pull request. | ||
### Use LLM (Large Language Model) to generate tags (advanced) | ||
You can use LLM to generate tags for your documents. | ||
Launch the LLM server by running the following command. | ||
```console | ||
$ ./run_ollama.sh | ||
``` | ||
You can find `./run_ollama.sh` at [run_ollama.sh](https://github.com/akawashiro/jendeley/tree/main/jendeley-backend/run_ollama.sh). | ||
Then, you can enable automatic tagging by setting the `--experimental_use_ollama_server` option when launching `jendeley`. | ||
For example, | ||
```console | ||
$ jendeley launch --db <YOUR PDFs DIR>/jendeley_db.json --experimental_use_ollama_server | ||
``` | ||
To run the LLM server automatically, you can use the following `systemd` service file. | ||
```console | ||
$ cat ~/.config/systemd/user/ollama-jendeley.service | ||
# jendeley.service | ||
[Unit] | ||
Description=jendeley JSON-based document organization software | ||
[Service] | ||
ExecStart=<PATH_TO_NODE>/node/v18.16.0/lib/node_modules/@a_kawashiro/jendeley/run_ollama.sh | ||
[Install] | ||
WantedBy=default.target | ||
$ systemctl --user enable ollama-jendeley | ||
$ systemctl --user start ollama-jendeley | ||
``` | ||
To check the LLM server's status, you can use the following command. | ||
```console | ||
$ journalctl --user -f -u ollama-jendeley.service | ||
``` | ||
## Contact me | ||
You can find me on Twitter at [https://twitter.com/a_kawashiro](https://twitter.com/a_kawashiro) and on Mastodon at [https://mstdn.jp/@a_kawashiro](https://mstdn.jp/@a_kawashiro). Additional contact information can be found on my website at [https://akawashiro.github.io/#links](https://akawashiro.github.io/#links). Also, feel free to create an issue or submit a pull request on [the repository](https://github.com/akawashiro/jendeley). | ||
## Support me | ||
Please star [akawashiro/jendeley](https://github.com/akawashiro/jendeley). It encourages me a lot. |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
1721908
65
8286
200
1
15
9