openchemlib-utils
Advanced tools
Comparing version 1.0.0 to 1.1.0
# Changelog | ||
## [1.1.0](https://www.github.com/cheminfo/openchemlib-utils/compare/v1.0.0...v1.1.0) (2021-05-10) | ||
### Features | ||
* add MoleculesDB to store and search molecules ([da7cbc9](https://www.github.com/cheminfo/openchemlib-utils/commit/da7cbc9eb47f6232d566f98206c1df362faeb852)) | ||
## [1.0.0](https://www.github.com/cheminfo/openchemlib-utils/compare/v0.7.1...v1.0.0) (2021-03-24) | ||
@@ -4,0 +11,0 @@ |
416
lib/index.js
@@ -8,2 +8,5 @@ 'use strict'; | ||
var atomSorter = require('atom-sorter'); | ||
var ensureString = require('ensure-string'); | ||
var Papa = require('papaparse'); | ||
var sdfParser = require('sdf-parser'); | ||
@@ -14,2 +17,4 @@ function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; } | ||
var atomSorter__default = /*#__PURE__*/_interopDefaultLegacy(atomSorter); | ||
var Papa__default = /*#__PURE__*/_interopDefaultLegacy(Papa); | ||
var sdfParser__default = /*#__PURE__*/_interopDefaultLegacy(sdfParser); | ||
@@ -422,2 +427,3 @@ let xAtomicNumber = 0; | ||
* @param {array} [fragments] Array of {smiles,R1,R2,...} | ||
* @param {OCL} [OCL] The openchemlib library | ||
* @param {object} [options={}] | ||
@@ -1065,4 +1071,414 @@ * @param {function} [options.onStep] method to execute each new molecules | ||
function getMoleculeCreators (Molecule) { | ||
const fields = new Map(); | ||
fields.set('oclid', Molecule.fromIDCode); | ||
fields.set('idcode', Molecule.fromIDCode); | ||
fields.set('smiles', Molecule.fromSmiles); | ||
fields.set('molfile', Molecule.fromMolfile); | ||
return fields; | ||
} | ||
const defaultCSVOptions = { | ||
header: true, | ||
dynamicTyping: true, | ||
skipEmptyLines: true, | ||
}; | ||
async function appendCSV(moleculesDB, csv, options = {}) { | ||
const { onStep } = options; | ||
csv = ensureString.ensureString(csv); | ||
const moleculeCreators = getMoleculeCreators(moleculesDB.OCL.Molecule); | ||
if (typeof csv !== 'string') { | ||
throw new TypeError('csv must be a string'); | ||
} | ||
options = { ...defaultCSVOptions, ...options }; | ||
const parsed = Papa__default['default'].parse(csv, options); | ||
const fields = parsed.meta.fields; | ||
const stats = new Array(fields.length); | ||
const firstElement = parsed.data[0]; | ||
let moleculeCreator, moleculeField; | ||
for (let i = 0; i < fields.length; i++) { | ||
stats[i] = { | ||
label: fields[i], | ||
isNumeric: typeof firstElement[fields[i]] === 'number', | ||
}; | ||
const lowerField = fields[i].toLowerCase(); | ||
if (moleculeCreators.has(lowerField)) { | ||
moleculeCreator = moleculeCreators.get(lowerField); | ||
moleculeField = fields[i]; | ||
} | ||
} | ||
if (!moleculeCreator) { | ||
throw new Error('this document does not contain any molecule field'); | ||
} | ||
moleculesDB.statistics = stats; | ||
for (let i = 0; i < parsed.data.length; i++) { | ||
moleculesDB.pushEntry( | ||
moleculeCreator(parsed.data[i][moleculeField]), | ||
parsed.data[i], | ||
); | ||
if (onStep) { | ||
await onStep(i + 1, parsed.data.length); | ||
} | ||
} | ||
} | ||
async function appendSDF(moleculesDB, sdf, options = {}) { | ||
const { onStep } = options; | ||
sdf = ensureString.ensureString(sdf); | ||
if (typeof sdf !== 'string') { | ||
throw new TypeError('sdf must be a string'); | ||
} | ||
const parsed = sdfParser__default['default'](sdf); | ||
moleculesDB.statistics = parsed.statistics; | ||
for (let i = 0; i < parsed.molecules.length; i++) { | ||
const molecule = parsed.molecules[i]; | ||
moleculesDB.pushEntry( | ||
moleculesDB.OCL.Molecule.fromMolfile(molecule.molfile), | ||
molecule, | ||
); | ||
if (onStep) { | ||
await onStep(i + 1, parsed.molecules.length); | ||
} | ||
} | ||
} | ||
function pushEntry( | ||
moleculesDB, | ||
molecule, | ||
data = {}, | ||
moleculeInfo = {}, | ||
) { | ||
// the following line could be the source of problems if the idCode version | ||
// changes | ||
let moleculeIDCode = moleculeInfo.idCode | ||
? moleculeInfo.idCode | ||
: molecule.getIDCode(); | ||
let entry = moleculesDB.db[moleculeIDCode]; | ||
if (!entry) { | ||
// a new molecule | ||
entry = { molecule, properties: {}, data: [], idCode: moleculeIDCode }; | ||
moleculesDB.db[moleculeIDCode] = entry; | ||
// ensure helper arrays needed for substructure search | ||
molecule.ensureHelperArrays(moleculesDB.OCL.Molecule.cHelperRings); | ||
if (!moleculeInfo.index) { | ||
entry.index = molecule.getIndex(); | ||
} else { | ||
entry.index = moleculeInfo.index; | ||
} | ||
let molecularFormula; | ||
if (!moleculeInfo.mw) { | ||
molecularFormula = molecule.getMolecularFormula(); | ||
entry.properties.mw = molecularFormula.relativeWeight; | ||
} else { | ||
entry.properties.mw = moleculeInfo.mw; | ||
} | ||
if (moleculesDB.computeProperties) { | ||
if (!molecularFormula) { | ||
molecularFormula = molecule.getMolecularFormula(); | ||
} | ||
const properties = new moleculesDB.OCL.MoleculeProperties(molecule); | ||
entry.properties.em = molecularFormula.absoluteWeight; | ||
entry.properties.mf = molecularFormula.formula; | ||
entry.properties.acceptorCount = properties.acceptorCount; | ||
entry.properties.donorCount = properties.donorCount; | ||
entry.properties.logP = properties.logP; | ||
entry.properties.logS = properties.logS; | ||
entry.properties.polarSurfaceArea = properties.polarSurfaceArea; | ||
entry.properties.rotatableBondCount = properties.rotatableBondCount; | ||
entry.properties.stereoCenterCount = properties.stereoCenterCount; | ||
} | ||
} | ||
entry.data.push(data); | ||
} | ||
function pushMoleculeInfo(moleculesDB, moleculeInfo, data = {}) { | ||
if (typeof moleculeInfo !== 'object') { | ||
throw new Error('pushMoleculeInfo requires an object as first parameter'); | ||
} | ||
const Molecule = moleculesDB.OCL.Molecule; | ||
let molecule; | ||
if (moleculeInfo.molfile) { | ||
molecule = Molecule.fromMolfile(moleculeInfo.molfile); | ||
} | ||
if (moleculeInfo.smiles) molecule = Molecule.fromSmiles(moleculeInfo.smiles); | ||
if (moleculeInfo.idCode) { | ||
if (moleculesDB.db[moleculeInfo.idCode]) { | ||
molecule = moleculesDB.db[moleculeInfo.idCode].molecule; | ||
} else { | ||
molecule = Molecule.fromIDCode( | ||
moleculeInfo.idCode, | ||
moleculeInfo.coordinates || false, | ||
); | ||
} | ||
} | ||
if (molecule) { | ||
moleculesDB.pushEntry(molecule, data, moleculeInfo); | ||
} | ||
} | ||
function search(moleculesDB, query, options = {}) { | ||
const { | ||
format = 'idCode', | ||
mode = 'substructure', | ||
flattenResult = true, | ||
keepMolecule = false, | ||
limit = Number.MAX_SAFE_INTEGER, | ||
} = options; | ||
if (typeof query === 'string') { | ||
const moleculeCreators = getMoleculeCreators(moleculesDB.OCL.Molecule); | ||
query = moleculeCreators.get(format.toLowerCase())(query); | ||
} else if (!(query instanceof moleculesDB.OCL.Molecule)) { | ||
throw new TypeError('toSearch must be a Molecule or string'); | ||
} | ||
let result; | ||
switch (mode.toLowerCase()) { | ||
case 'exact': | ||
result = exactSearch(moleculesDB, query); | ||
break; | ||
case 'substructure': | ||
result = subStructureSearch(moleculesDB, query); | ||
break; | ||
case 'similarity': | ||
result = similaritySearch(moleculesDB, query); | ||
break; | ||
default: | ||
throw new Error(`unknown search mode: ${options.mode}`); | ||
} | ||
return processResult(result, { flattenResult, keepMolecule, limit }); | ||
} | ||
function exactSearch(moleculesDB, query) { | ||
const queryIDCode = query.getIDCode(); | ||
let searchResult = moleculesDB.db[queryIDCode] | ||
? [moleculesDB.db[queryIDCode]] | ||
: []; | ||
return searchResult; | ||
} | ||
function subStructureSearch(moleculesDB, query) { | ||
let resetFragment = false; | ||
if (!query.isFragment()) { | ||
resetFragment = true; | ||
query.setFragment(true); | ||
} | ||
const queryMW = getMW(query); | ||
const searchResult = []; | ||
if (query.getAllAtoms() === 0) { | ||
for (let idCode in moleculesDB.db) { | ||
searchResult.push(moleculesDB.db[idCode]); | ||
} | ||
} else { | ||
const queryIndex = query.getIndex(); | ||
const searcher = moleculesDB.searcher; | ||
searcher.setFragment(query, queryIndex); | ||
for (let idCode in moleculesDB.db) { | ||
let entry = moleculesDB.db[idCode]; | ||
searcher.setMolecule(entry.molecule, entry.index); | ||
if (searcher.isFragmentInMolecule()) { | ||
searchResult.push(entry); | ||
} | ||
} | ||
} | ||
searchResult.sort(function (a, b) { | ||
return ( | ||
Math.abs(queryMW - a.properties.mw) - Math.abs(queryMW - b.properties.mw) | ||
); | ||
}); | ||
if (resetFragment) { | ||
query.setFragment(false); | ||
} | ||
return searchResult; | ||
} | ||
function similaritySearch(moleculesDB, query) { | ||
const queryIndex = query.getIndex(); | ||
const queryMW = getMW(query); | ||
const queryIdCode = query.getIDCode(); | ||
const searchResult = []; | ||
let similarity; | ||
for (let idCode in moleculesDB.db) { | ||
let entry = moleculesDB.db[idCode]; | ||
if (entry.idCode === queryIdCode) { | ||
similarity = Number.MAX_SAFE_INTEGER; | ||
} else { | ||
similarity = | ||
moleculesDB.OCL.SSSearcherWithIndex.getSimilarityTanimoto( | ||
queryIndex, | ||
entry.index, | ||
) * | ||
1000000 - | ||
Math.abs(queryMW - entry.properties.mw) / 10000; | ||
} | ||
searchResult.push({ similarity, entry }); | ||
} | ||
searchResult.sort(function (a, b) { | ||
return b.similarity - a.similarity; | ||
}); | ||
return searchResult.map((entry) => entry.entry); | ||
} | ||
function getMW(query) { | ||
let copy = query.getCompactCopy(); | ||
copy.setFragment(false); | ||
return copy.getMolecularFormula().relativeWeight; | ||
} | ||
function processResult(entries, options = {}) { | ||
const { | ||
flattenResult = true, | ||
keepMolecule = false, | ||
limit = Number.MAX_SAFE_INTEGER, | ||
} = options; | ||
let results = []; | ||
if (flattenResult) { | ||
for (let entry of entries) { | ||
for (let data of entry.data) { | ||
results.push({ | ||
data, | ||
idCode: entry.idCode, | ||
properties: entry.properties, | ||
molecule: keepMolecule ? entry.molecule : undefined, | ||
}); | ||
} | ||
} | ||
} else { | ||
for (let entry of entries) { | ||
results.push({ | ||
data: entry.data, | ||
idCode: entry.idCode, | ||
properties: entry.properties, | ||
molecule: keepMolecule ? entry.molecule : undefined, | ||
}); | ||
} | ||
} | ||
if (limit < results.length) results.length = limit; | ||
return results; | ||
} | ||
/* | ||
this.db is an object with properties 'oclID' that has as value | ||
an object that contains the following properties: | ||
* molecule: an OCL molecule instance | ||
* index: OCL index used for substructure searching | ||
* properties: all the calculates properties | ||
* data: array containing free data associated with this molecule | ||
*/ | ||
class MoleculesDB { | ||
/** | ||
* | ||
* @param {OCL} [OCL] The openchemlib library | ||
* @param {object} [options={}] | ||
* @param {boolean} [options.computeProperties=false] | ||
*/ | ||
constructor(OCL, options = {}) { | ||
const { computeProperties = false } = options; | ||
this.OCL = OCL; | ||
this.db = {}; | ||
this.statistics = null; | ||
this.computeProperties = computeProperties; | ||
this.searcher = new OCL.SSSearcherWithIndex(); | ||
} | ||
/** | ||
* append to the current database a CSV file | ||
* @param {text|ArrayBuffer} csv - text file containing the comma separated value file | ||
* @param {object} [options={}] | ||
* @param {boolean} [options.header=true] | ||
* @param {boolean} [options.dynamicTyping=true] | ||
* @param {boolean} [options.skipEmptyLines=true] | ||
* @param {function} [options.onStep] call back to execute after each molecule | ||
*/ | ||
appendCSV(csv, options) { | ||
return appendCSV(this, csv, { | ||
computeProperties: this.computeProperties, | ||
...options, | ||
}); | ||
} | ||
/** | ||
* Append a SDF to the current database | ||
* @param {text|ArrayBuffer} sdf - text file containing the sdf | ||
* @param {object} [options={}] | ||
* @param {function} [options.onStep] call back to execute after each molecule | ||
* @returns {DB} | ||
*/ | ||
appendSDF(sdf, options) { | ||
return appendSDF(this, sdf, { | ||
computeProperties: this.computeProperties, | ||
...options, | ||
}); | ||
} | ||
/** | ||
* Add a molecule to the current database | ||
* @param {OCL.Molecule} molecule | ||
* @param {object} [data={}] | ||
* @param {object} [moleculeInfo={}] may contain precalculated index and mw | ||
*/ | ||
pushEntry(molecule, data, moleculeInfo) { | ||
pushEntry(this, molecule, data, moleculeInfo); | ||
} | ||
/** | ||
* Add an netry in the database | ||
* @param {object} moleculeInfo - a molecule as a JSON that may contain the following properties: molfile, smiles, idCode, mf, index | ||
* @param {object} [data={}] | ||
*/ | ||
pushMoleculeInfo(moleculeInfo, data) { | ||
return pushMoleculeInfo(this, moleculeInfo, data); | ||
} | ||
/** | ||
* Search in a MoleculesDB | ||
* Inside the database all the same molecules are group together | ||
* @param {string|OCL.Molecule} [query] smiles, molfile, oclCode or instance of Molecule to look for | ||
* @param {object} [options={}] | ||
* @param {string} [options.format='idCode'] - query is in the format 'smiles', 'oclid' or 'molfile' | ||
* @param {string} [options.mode='substructure'] - search by 'substructure', 'exact' or 'similarity' | ||
* @param {boolean} [options.flattenResult=true] - The database group the data for the same product. This allows to flatten the result | ||
* @param {boolean} [options.keepMolecule=false] - keep the OCL.Molecule object in the result | ||
* @param {number} [options.limit=Number.MAX_SAFE_INTEGER] - maximal number of result | ||
* @return {Array} array of object of the type {(molecule), idCode, data, properties} | ||
*/ | ||
search(query, options) { | ||
return search(this, query, options); | ||
} | ||
/** | ||
* Returns an array with the current database | ||
* @returns | ||
*/ | ||
getDB() { | ||
return Object.keys(this.db).map((key) => this.db[key]); | ||
} | ||
} | ||
exports.FULL_HOSE_CODE = FULL_HOSE_CODE; | ||
exports.HOSE_CODE_CUT_C_SP3_SP3 = HOSE_CODE_CUT_C_SP3_SP3; | ||
exports.MoleculesDB = MoleculesDB; | ||
exports.addDiastereotopicMissingChirality = addDiastereotopicMissingChirality; | ||
@@ -1069,0 +1485,0 @@ exports.combineSmiles = combineSmiles; |
{ | ||
"name": "openchemlib-utils", | ||
"version": "1.0.0", | ||
"version": "1.1.0", | ||
"description": "", | ||
@@ -42,16 +42,19 @@ "main": "lib/index.js", | ||
"devDependencies": { | ||
"@babel/plugin-transform-modules-commonjs": "^7.13.8", | ||
"@babel/plugin-transform-modules-commonjs": "^7.14.0", | ||
"cheminfo-build": "^1.1.10", | ||
"eslint": "^7.22.0", | ||
"eslint-config-cheminfo": "^5.2.3", | ||
"eslint": "^7.26.0", | ||
"eslint-config-cheminfo": "^5.2.4", | ||
"esm": "^3.2.25", | ||
"jest": "^26.6.3", | ||
"openchemlib": "7.4.0", | ||
"prettier": "^2.2.1", | ||
"rollup": "^2.42.3" | ||
"prettier": "^2.3.0", | ||
"rollup": "^2.47.0" | ||
}, | ||
"dependencies": { | ||
"atom-sorter": "^1.1.9", | ||
"ensure-string": "^0.1.1", | ||
"ml-floyd-warshall": "^1.0.3", | ||
"ml-matrix": "^6.7.0" | ||
"ml-matrix": "^6.8.0", | ||
"papaparse": "^5.3.0", | ||
"sdf-parser": "^4.0.2" | ||
}, | ||
@@ -58,0 +61,0 @@ "peerDependencies": { |
@@ -21,1 +21,3 @@ export * from './diastereotopic/addDiastereotopicMissingChirality'; | ||
export * from './path/getShortestPaths'; | ||
export * from './db/MoleculesDB'; |
@@ -7,2 +7,3 @@ const MAX_R = 10; | ||
* @param {array} [fragments] Array of {smiles,R1,R2,...} | ||
* @param {OCL} [OCL] The openchemlib library | ||
* @param {object} [options={}] | ||
@@ -9,0 +10,0 @@ * @param {function} [options.onStep] method to execute each new molecules |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
104242
37
2797
7
+ Addedensure-string@^0.1.1
+ Addedpapaparse@^5.3.0
+ Addedsdf-parser@^4.0.2
+ Addedduplexify@4.1.3(transitive)
+ Addedend-of-stream@1.4.4(transitive)
+ Addedensure-string@0.1.1(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedonce@1.4.0(transitive)
+ Addedpapaparse@5.4.1(transitive)
+ Addedpump@3.0.2(transitive)
+ Addedpumpify@2.0.1(transitive)
+ Addedreadable-stream@3.6.2(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedsdf-parser@4.0.2(transitive)
+ Addedsplit2@3.2.2(transitive)
+ Addedstream-shift@1.0.3(transitive)
+ Addedstring_decoder@1.3.0(transitive)
+ Addedthrough2@3.0.24.0.2(transitive)
+ Addedthrough2-filter@3.1.0(transitive)
+ Addedutil-deprecate@1.0.2(transitive)
+ Addedwrappy@1.0.2(transitive)
Updatedml-matrix@^6.8.0