New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

getpapers

Package Overview
Dependencies
Maintainers
1
Versions
31
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

getpapers - npm Package Compare versions

Comparing version 0.0.5 to 0.0.6

13

bin/getpapers.js

@@ -18,2 +18,8 @@ #!/usr/bin/env node

'not found)')
.option('-x, --xml',
'Download fulltext XMLs if available')
.option('-p, --pdf',
'Download fulltext PDFs if available')
.option('-s, --supp',
'Download supplementary files if available')
.option('-l, --loglevel <level>',

@@ -66,5 +72,10 @@ 'amount of information to log ' +

var options = {}
options.xml = program.xml;
options.pdf = program.pdf;
options.supp = program.supp;
mkdirp.sync(program.outdir);
process.chdir(program.outdir);
var eupmc = new EuPmc();
var eupmc = new EuPmc(options);
eupmc.search(program.query);

@@ -10,3 +10,3 @@ var rest = require('restler')

var EuPmc = function() {
var EuPmc = function(opts) {

@@ -16,2 +16,4 @@ this.baseurl = 'http://www.ebi.ac.uk/' +

this.opts = opts;
}

@@ -148,4 +150,16 @@

// download the fullText XML
eupmc.downloadFulltextXMLs();
if (eupmc.opts.xml) {
eupmc.downloadFulltextXMLs();
}
//download the fullText PDF
if (eupmc.opts.pdf) {
eupmc.downloadFulltextPDFs();
}
// download the supplementary files
if (eupmc.opts.supp) {
eupmc.downloadSuppFiles();
}
}

@@ -181,6 +195,74 @@

eupmc.downloadUrls(urls, 'XML', 'fulltext.html',
eupmc.downloadUrls(urls, 'XML', 'fulltext.xml',
failed, done, eupmc);
}
EuPmc.prototype.downloadFulltextPDFs = function() {
var eupmc = this;
log.info('Extracting fulltext PDF URLlist');
urls = eupmc.allresults
.map(eupmc.getFulltextPDFUrl, eupmc)
.filter(function(x) { return !(x === null) });
log.info('Downloading fulltext PDF files');
var failed = [];
var retries = 0;
var done = _.after(urls.length, function() {
if (failed.length > 0 && retries == 0) {
log.info(failed.length + ' downloads timed out. Retrying.');
failed = [];
eupmc.downloadUrls(urls, 'PDF', 'fulltext.pdf',
failed, done, eupmc);
} else if (failed.length > 0) {
log.info(failed.length + ' downloads timed on retry. Skipping.');
} else {
log.info('All downloads succeeded!');
process.exit(0);
}
});
eupmc.downloadUrls(urls, 'PDF', 'fulltext.pdf',
failed, done, eupmc);
}
EuPmc.prototype.downloadSuppFiles = function() {
var eupmc = this;
log.info('Extracting supplementary file URLlist');
urls = eupmc.allresults
.map(eupmc.getSuppFilesUrl, eupmc)
.filter(function(x) { return !(x === null) });
log.info('Downloading supplementary files');
var failed = [];
var retries = 0;
var done = _.after(urls.length, function() {
if (failed.length > 0 && retries == 0) {
log.info(failed.length + ' downloads timed out. Retrying.');
failed = [];
eupmc.downloadUrls(urls,
'supplementary files',
'supplementaryFiles.zip',
failed, done, eupmc);
} else if (failed.length > 0) {
log.info(failed.length + ' downloads timed on retry. Skipping.');
} else {
log.info('All downloads succeeded!');
process.exit(0);
}
});
eupmc.downloadUrls(urls,
'supplementary files',
'supplementaryFiles.zip',
failed, done, eupmc);
}
EuPmc.prototype.downloadUrls = function(urls, type, rename, failed, cb, thisArg) {

@@ -205,3 +287,7 @@

log.debug('Downloading ' + type + ': ' + url);
var get = got(url, {timeout: 15000}, function(err, data, resp) {
var options = {
timeout: 15000,
encoding: null
}
var get = got(url, options, function(err, data) {
dlprogress.tick();

@@ -278,2 +364,41 @@ if (err) {

EuPmc.prototype.getFulltextPDFUrl = function(result) {
var eupmc = this;
var urls = result.fullTextUrlList[0].fullTextUrl;
var pdfOAurls = urls.filter(function(u) {
return u.documentStyle[0] == 'pdf' &&
u.availabilityCode[0] == 'OA'
});
if (pdfOAurls.length == 0) {
var id = eupmc.getIdentifier(result);
log.warn('Article with ' + id.type + ' "' +
id.id + '" had no fulltext PDF url');
return null;
} else {
return pdfOAurls[0].url[0];
}
}
EuPmc.prototype.getSuppFilesUrl = function(result) {
var eupmc = this;
var id = eupmc.getIdentifier(result);
if (id.type == 'pmcid') {
return 'http://www.ebi.ac.uk/europepmc/webservices/rest/' +
id.id + '/supplementaryFiles';
} else {
log.warn('Article with ' + id.type + ' "' +
id.id + ' did not have a PMCID (therefore no supplementary files)');
return null;
}
}
module.exports = EuPmc;

2

package.json
{
"name": "getpapers",
"description": "Get fulltexts or fulltext URLs of papers matching a search query",
"version": "0.0.5",
"version": "0.0.6",
"homepage": "https://github.com/ContentMine/getpapers",

@@ -6,0 +6,0 @@ "author": {

# getpapers
Get fulltexts or fulltext URLs of papers matching a PubMed search query.
Get fulltexts or fulltext URLs of papers matching a search query using the EuropePMC API.
Uses the EuropePMC API.
getpapers can fetch article metadata, fulltexts (PDF or XML), and supplementary materials. It's designed for use in content mining, but you may find it useful for quickly acquiring large numbers of papers for reading.

@@ -26,2 +26,5 @@ ## Installation

-o, --outdir <path> Output directory (required - will be created if not found)
-x, --xml Download fulltext XMLs if available
-p, --pdf Download fulltext PDFs if available
-s, --supp Download supplementary files if available
-l, --loglevel <level> amount of information to log (silent, verbose, info*, data, warn, error, or debug)

@@ -34,1 +37,68 @@

![screenshot](https://raw.githubusercontent.com/ContentMine/getpapers/master/docs/screenshot.png)
## Query format
Queries are processed by EuropePMC. In their simplest form, they can be free text, like this:
```
--query 'brain tumour rnaseq'
```
But they can also be much more detailed, using the EuropePMC webservice's query language (see Appendix 1 of [the EuropePMC reference PDF](http://europepmc.org/docs/EBI_Europe_PMC_Web_Service_Reference.pdf)).
For example we can restrict our search to only papers that mention 'transcriptome assembly' in the methods:
```
--query 'METHODS:"transcriptome assembly"'
```
Or to only papers with a CC-BY license:
```
--query 'LICENSE:"cc by" OR LICENSE:"cc-by"'
```
Note that in this case, we combine two restrictions using the logical `OR` keyword. We can also use `AND`, and can group operations using brackets:
```
--query '(LICENSE:"cc by" OR LICENSE:"cc-by") AND METHODS:"transcriptome assembly"'
```
A selection of the most commonly useful search fields are explained below...
### Restrict search by bibliographic metadata
| Field | Description | Example |
|-----------|---------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------|
| `PMCID:` | Search for a publication by its PubMed Central ID, where applicable (i.e. available as full text) | `PMCID:PMC1287967` |
| `TITLE:` | Search for a term or terms in publication titles | `TITLE:aspirin, TITLE:”protein knowledgebase”` |
| `ABSTRACT:` | Search for a term or terms in publication abstracts | `ABSTRACT:malaria`, `ABSTRACT:”chicken pox”` |
| `AUTH:` | Search for a surname and (optionally) initial(s) in publication author lists | `AUTH:einstein`, `AUTH:”Smith AB”` |
| `JOURNAL:` | Journal title – searchable either in full or abbreviated form | `JOURNAL:”biology letters”`, `JOURNAL:”biol lett”` |
| `LICENSE:` | Search for content according to the assigned Creative Commons license (where provided). | `LICENSE:"cc by" OR LICENSE:"cc-by"`, `LICENSE:cc` |
### Restrict by article metadata
| Field | Description | Example |
|---------------|--------------------------------------------------|-------------------------------------------------------|
| `DISEASE:` | Search for mined diseases | `DISEASE:dysthymias` |
| `GENE_PROTEIN:` | Search for records that have GENE_PROTEINS mined | `GENE_PROTEIN:gng11` |
| `GOTERM:` | Search for records that have GOTERM mined | `GOTERM:apoptosis` |
| `CHEM:` | Limit your search by MeSH substance | `CHEM:propantheline`, `CHEM:”protein kinases”` |
| `ORGANISM:` | Search for mined organisms | `ORGANISM:terebratulide` |
| `PUB_TYPE:` | Limit your search by publication type | `PUB_TYPE:review`, `PUB_TYPE:”retraction of publication”` |
### Section-level search
| Field | Description | Example |
|------------|----------------------------------------------------------------------|--------------------------------|
| `INTRO:` | Find articles with a phrase in the Introduction & Background section | `INTRO:“protein interactions”` |
| `METHODS:` | Find articles with a phrase in the Materials & Methods section | `METHODS:“yeast two-hybrid”` |
| `RESULTS:` | Find articles with a phrase in the Results section | `RESULTS:"in vivo"` |
| `DISCUSS:` | Find articles with a phrase in the Discussion seciton | `DISCUSS:cardivascular` |
## License
Copyright (c) 2014 Shuttleworth Foundation
Licensed under the MIT license

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc