@istex/istex-merge
Advanced tools
Comparing version 2.1.5 to 2.2.0
const generateMergedDocument = require('./src/generateMergedDocument'); | ||
const defaultMapping = require('./mapping/default.json'); | ||
const defaultRules = require('./rules/default.json'); | ||
const generateHalTEI = require('./src/generateHalTEI'); | ||
const generateHalTei = require('./src/generateHalTei'); | ||
@@ -10,3 +10,3 @@ module.exports = { | ||
defaultRules, | ||
generateHalTEI, | ||
generateHalTei, | ||
}; |
@@ -105,2 +105,47 @@ /* eslint-disable no-labels */ | ||
addRnsrInAuthors (authors) { | ||
for (const currentAuthor of authors) { | ||
// If the current author has no forename or no surname, go to the next author | ||
if (!currentAuthor.forename || !currentAuthor.surname) continue; | ||
for (const otherSource of this.getSourcesOrderedByPriority()) { | ||
for (const docObjectFromOtherSource of this.docObjects[otherSource]) { | ||
const authorsFromOtherSource = docObjectFromOtherSource.authors; | ||
// If there is no author in this source, go to the next source | ||
if (!authorsFromOtherSource) continue; | ||
// Get the authors from this source that have at least one affiliation with an RNSR | ||
const authorsFromOtherSourceThatHaveAffiliationsWithRnsr = _.filter(authorsFromOtherSource, author => { | ||
// Make sure the author has affiliations | ||
if (!SourceManager.isNonEmptyArray(author.affiliations)) return false; | ||
// Look for affiliations with an RNSR (the RNSR can come from an enrichment) | ||
for (const affiliation of author.affiliations) { | ||
if (SourceManager.isNonEmptyArray(affiliation.rnsr) || SourceManager.isNonEmptyArray(_.get(affiliation, 'enrichments.rnsr'))) { | ||
return true; | ||
} | ||
} | ||
}); | ||
// If the authors from this source don't have affiliations with an RNSR, go to the next source | ||
if (_.isEmpty(authorsFromOtherSourceThatHaveAffiliationsWithRnsr)) continue; | ||
for (const authorFromOtherSourceThatHaveAffiliationsWithRnsr of authorsFromOtherSourceThatHaveAffiliationsWithRnsr) { | ||
// If the current author from the other source is not our current author, go to the next one | ||
if (!SourceManager.authorsEqual(authorFromOtherSourceThatHaveAffiliationsWithRnsr, currentAuthor)) continue; | ||
if (!currentAuthor.rnsr) currentAuthor.rnsr = []; | ||
for (const affiliation of authorFromOtherSourceThatHaveAffiliationsWithRnsr.affiliations) { | ||
if (SourceManager.isNonEmptyArray(affiliation.rnsr)) { | ||
currentAuthor.rnsr = currentAuthor.rnsr.concat(affiliation.rnsr); | ||
} | ||
const rnsrFromEnrichments = _.get(affiliation, 'enrichments.rnsr'); | ||
if (SourceManager.isNonEmptyArray(rnsrFromEnrichments)) { | ||
currentAuthor.rnsr = currentAuthor.rnsr.concat(rnsrFromEnrichments); | ||
} | ||
} | ||
// If multiple sources give the same RNSR multiple times we want to remove the potential duplicates | ||
currentAuthor.rnsr = _.uniq(currentAuthor.rnsr); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
merge (source, mapping) { | ||
@@ -118,3 +163,6 @@ const result = { | ||
// Special treatment for the authors | ||
if (key === 'authors') this.addOrcIdInAuthors(value); | ||
if (key === 'authors') { | ||
this.addOrcIdInAuthors(value); | ||
this.addRnsrInAuthors(value); | ||
} | ||
result.properties[key] = source; | ||
@@ -121,0 +169,0 @@ _.set(result.data, key, value); |
@@ -75,21 +75,21 @@ { | ||
"sciencespo": true, | ||
"_technical": false, | ||
"_business.name": false, | ||
"_business.duplicateGenre": true, | ||
"_business.sourceUidChain": true, | ||
"_business.sources": false, | ||
"_business.duplicates": { | ||
"technical": false, | ||
"business.name": false, | ||
"business.duplicateGenre": true, | ||
"business.sourceUidChain": true, | ||
"business.sources": false, | ||
"business.duplicates": { | ||
"action": "merge", | ||
"id": "sourceUid" | ||
}, | ||
"_business.duplicateRules": true, | ||
"_business.isDuplicate": false, | ||
"_business.xPublicationDate": false, | ||
"_business.xissn": false, | ||
"_business.first3AuthorNames": false, | ||
"_business.first3AuthorNamesWithInitials": false, | ||
"_business.hasDoi": false, | ||
"_business.hasFulltext": false, | ||
"_business.isDeduplicable": false, | ||
"_business.hasTransDuplicate": false, | ||
"business.duplicateRules": true, | ||
"business.isDuplicate": false, | ||
"business.xPublicationDate": false, | ||
"business.xissn": false, | ||
"business.first3AuthorNames": false, | ||
"business.first3AuthorNamesWithInitials": false, | ||
"business.hasDoi": false, | ||
"business.hasFulltext": false, | ||
"business.isDeduplicable": false, | ||
"business.hasTransDuplicate": false, | ||
"title.default": true, | ||
@@ -96,0 +96,0 @@ "title.en": true, |
{ | ||
"name": "@istex/istex-merge", | ||
"version": "2.1.5", | ||
"version": "2.2.0", | ||
"description": "Library to build merged documents and generate Hal TEIs from them.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -13,3 +13,3 @@ # istex-merge | ||
- [Example](#example) | ||
- [generateHalTEI](#generatehaltei) | ||
- [generateHalTei](#generatehaltei) | ||
- [Prerequisites](#prerequisites-1) | ||
@@ -47,3 +47,3 @@ - [Usage](#usage-1) | ||
// ... | ||
"_business.duplicates": { | ||
"business.duplicates": { | ||
"action": "merge", | ||
@@ -53,3 +53,3 @@ "id": "sourceUid" | ||
// ... | ||
"_business.hasFulltext": false, | ||
"business.hasFulltext": false, | ||
"fulltextUrl": true | ||
@@ -71,3 +71,3 @@ } | ||
- Fields with a simple value (like a string): you can specify a path to where the merged data will be in the final object. In the example above, the `sourceUid` field is merged and placed into `sourceUids` (we make it plurial because the value becomes an array). | ||
- Fields with an array value (like `_business.duplicates`): a property (`sourceUid` in the example above) must be used to discriminate the values and remove potential duplicates if the values are objects. | ||
- Fields with an array value (like `business.duplicates`): a property (`sourceUid` in the example above) must be used to discriminate the values and remove potential duplicates if the values are objects. | ||
@@ -98,3 +98,3 @@ | ||
// ... | ||
"_business.hasFulltext": [/*...*/], | ||
"business.hasFulltext": [/*...*/], | ||
"fulltextUrl": [/*...*/] | ||
@@ -242,3 +242,3 @@ } | ||
## generateHalTEI | ||
## generateHalTei | ||
Function to generate a Hal TEI from a merged document. | ||
@@ -251,3 +251,3 @@ | ||
```JS | ||
const { generateMergedDocument, generateHalTEI } = require('@istex/istex-merge'); | ||
const { generateMergedDocument, generateHalTei } = require('@istex/istex-merge'); | ||
const docObjects = [{...}, {...}, {...}]; | ||
@@ -257,9 +257,9 @@ | ||
const halTEIAsString = generateHalTEI(mergedDocument); | ||
const halTeiAsString = generateHalTei(mergedDocument); | ||
``` | ||
You can also pass an `options` object to `generateHalTEI`. This object is passed as is to [xmlbuilder2](https://oozcitak.github.io/xmlbuilder2/) (the XML builder used by `istex-merge`). You can find all the available options [here](https://oozcitak.github.io/xmlbuilder2/serialization.html#serialization-settings). | ||
You can also pass an `options` object to `generateHalTei`. This object is passed as is to [xmlbuilder2](https://oozcitak.github.io/xmlbuilder2/) (the XML builder used by `istex-merge`). You can find all the available options [here](https://oozcitak.github.io/xmlbuilder2/serialization.html#serialization-settings). | ||
For example, you can use this `options` object to pretty print the TEI like so: | ||
```JS | ||
const prettyPrintedTEI = generateHalTEI(mergedDocument, { prettyPrint: true }); | ||
const prettyPrintedTei = generateHalTei(mergedDocument, { prettyPrint: true }); | ||
``` |
@@ -115,18 +115,18 @@ { | ||
"sciencespo": [], | ||
"_technical": [], | ||
"_business.name": [], | ||
"_business.duplicateGenre": [], | ||
"_business.sourceUidChain": [], | ||
"_business.sources": [], | ||
"_business.duplicates": [], | ||
"_business.duplicateRules": [], | ||
"_business.isDuplicate": [], | ||
"_business.xPublicationDate": [], | ||
"_business.xissn": [], | ||
"_business.first3AuthorNames": [], | ||
"_business.first3AuthorNamesWithInitials": [], | ||
"_business.hasDoi": [], | ||
"_business.hasFulltext": [], | ||
"_business.isDeduplicable": [], | ||
"_business.hasTransDuplicate": [], | ||
"technical": [], | ||
"business.name": [], | ||
"business.duplicateGenre": [], | ||
"business.sourceUidChain": [], | ||
"business.sources": [], | ||
"business.duplicates": [], | ||
"business.duplicateRules": [], | ||
"business.isDuplicate": [], | ||
"business.xPublicationDate": [], | ||
"business.xissn": [], | ||
"business.first3AuthorNames": [], | ||
"business.first3AuthorNamesWithInitials": [], | ||
"business.hasDoi": [], | ||
"business.hasFulltext": [], | ||
"business.isDeduplicable": [], | ||
"business.hasTransDuplicate": [], | ||
"title.default": [ | ||
@@ -133,0 +133,0 @@ "sudoc-theses", |
@@ -55,18 +55,18 @@ { | ||
"sciencespo": [], | ||
"_technical": [], | ||
"_business.name": [], | ||
"_business.duplicateGenre": [], | ||
"_business.sourceUidChain": [], | ||
"_business.sources": [], | ||
"_business.duplicates": [], | ||
"_business.duplicateRules": [], | ||
"_business.isDuplicate": [], | ||
"_business.xPublicationDate": [], | ||
"_business.xissn": [], | ||
"_business.first3AuthorNames": [], | ||
"_business.first3AuthorNamesWithInitials": [], | ||
"_business.hasDoi": [], | ||
"_business.hasFulltext": [], | ||
"_business.isDeduplicable": [], | ||
"_business.hasTransDuplicate": [], | ||
"technical": [], | ||
"business.name": [], | ||
"business.duplicateGenre": [], | ||
"business.sourceUidChain": [], | ||
"business.sources": [], | ||
"business.duplicates": [], | ||
"business.duplicateRules": [], | ||
"business.isDuplicate": [], | ||
"business.xPublicationDate": [], | ||
"business.xissn": [], | ||
"business.first3AuthorNames": [], | ||
"business.first3AuthorNamesWithInitials": [], | ||
"business.hasDoi": [], | ||
"business.hasFulltext": [], | ||
"business.isDeduplicable": [], | ||
"business.hasTransDuplicate": [], | ||
"title.default": [], | ||
@@ -73,0 +73,0 @@ "title.en": [], |
@@ -32,3 +32,3 @@ const _ = require('lodash'); | ||
if (sourceManager.hasSource('hal') && !sourceManager.getPropertyOf('hal', '_business.hasFulltext')) { | ||
if (sourceManager.hasSource('hal') && !sourceManager.getPropertyOf('hal', 'business.hasFulltext')) { | ||
rules = halWithoutFulltextRules; | ||
@@ -35,0 +35,0 @@ } |
84966
964