Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

openchemlib-utils

Package Overview
Dependencies
Maintainers
4
Versions
94
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

openchemlib-utils - npm Package Compare versions

Comparing version 1.0.0 to 1.1.0

src/db/MoleculesDB.js

7

CHANGELOG.md
# Changelog
## [1.1.0](https://www.github.com/cheminfo/openchemlib-utils/compare/v1.0.0...v1.1.0) (2021-05-10)
### Features
* add MoleculesDB to store and search molecules ([da7cbc9](https://www.github.com/cheminfo/openchemlib-utils/commit/da7cbc9eb47f6232d566f98206c1df362faeb852))
## [1.0.0](https://www.github.com/cheminfo/openchemlib-utils/compare/v0.7.1...v1.0.0) (2021-03-24)

@@ -4,0 +11,0 @@

@@ -8,2 +8,5 @@ 'use strict';

var atomSorter = require('atom-sorter');
var ensureString = require('ensure-string');
var Papa = require('papaparse');
var sdfParser = require('sdf-parser');

@@ -14,2 +17,4 @@ function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }

var atomSorter__default = /*#__PURE__*/_interopDefaultLegacy(atomSorter);
var Papa__default = /*#__PURE__*/_interopDefaultLegacy(Papa);
var sdfParser__default = /*#__PURE__*/_interopDefaultLegacy(sdfParser);

@@ -422,2 +427,3 @@ let xAtomicNumber = 0;

* @param {array} [fragments] Array of {smiles,R1,R2,...}
* @param {OCL} [OCL] The openchemlib library
* @param {object} [options={}]

@@ -1065,4 +1071,414 @@ * @param {function} [options.onStep] method to execute each new molecules

function getMoleculeCreators (Molecule) {
const fields = new Map();
fields.set('oclid', Molecule.fromIDCode);
fields.set('idcode', Molecule.fromIDCode);
fields.set('smiles', Molecule.fromSmiles);
fields.set('molfile', Molecule.fromMolfile);
return fields;
}
const defaultCSVOptions = {
header: true,
dynamicTyping: true,
skipEmptyLines: true,
};
async function appendCSV(moleculesDB, csv, options = {}) {
const { onStep } = options;
csv = ensureString.ensureString(csv);
const moleculeCreators = getMoleculeCreators(moleculesDB.OCL.Molecule);
if (typeof csv !== 'string') {
throw new TypeError('csv must be a string');
}
options = { ...defaultCSVOptions, ...options };
const parsed = Papa__default['default'].parse(csv, options);
const fields = parsed.meta.fields;
const stats = new Array(fields.length);
const firstElement = parsed.data[0];
let moleculeCreator, moleculeField;
for (let i = 0; i < fields.length; i++) {
stats[i] = {
label: fields[i],
isNumeric: typeof firstElement[fields[i]] === 'number',
};
const lowerField = fields[i].toLowerCase();
if (moleculeCreators.has(lowerField)) {
moleculeCreator = moleculeCreators.get(lowerField);
moleculeField = fields[i];
}
}
if (!moleculeCreator) {
throw new Error('this document does not contain any molecule field');
}
moleculesDB.statistics = stats;
for (let i = 0; i < parsed.data.length; i++) {
moleculesDB.pushEntry(
moleculeCreator(parsed.data[i][moleculeField]),
parsed.data[i],
);
if (onStep) {
await onStep(i + 1, parsed.data.length);
}
}
}
async function appendSDF(moleculesDB, sdf, options = {}) {
const { onStep } = options;
sdf = ensureString.ensureString(sdf);
if (typeof sdf !== 'string') {
throw new TypeError('sdf must be a string');
}
const parsed = sdfParser__default['default'](sdf);
moleculesDB.statistics = parsed.statistics;
for (let i = 0; i < parsed.molecules.length; i++) {
const molecule = parsed.molecules[i];
moleculesDB.pushEntry(
moleculesDB.OCL.Molecule.fromMolfile(molecule.molfile),
molecule,
);
if (onStep) {
await onStep(i + 1, parsed.molecules.length);
}
}
}
function pushEntry(
moleculesDB,
molecule,
data = {},
moleculeInfo = {},
) {
// the following line could be the source of problems if the idCode version
// changes
let moleculeIDCode = moleculeInfo.idCode
? moleculeInfo.idCode
: molecule.getIDCode();
let entry = moleculesDB.db[moleculeIDCode];
if (!entry) {
// a new molecule
entry = { molecule, properties: {}, data: [], idCode: moleculeIDCode };
moleculesDB.db[moleculeIDCode] = entry;
// ensure helper arrays needed for substructure search
molecule.ensureHelperArrays(moleculesDB.OCL.Molecule.cHelperRings);
if (!moleculeInfo.index) {
entry.index = molecule.getIndex();
} else {
entry.index = moleculeInfo.index;
}
let molecularFormula;
if (!moleculeInfo.mw) {
molecularFormula = molecule.getMolecularFormula();
entry.properties.mw = molecularFormula.relativeWeight;
} else {
entry.properties.mw = moleculeInfo.mw;
}
if (moleculesDB.computeProperties) {
if (!molecularFormula) {
molecularFormula = molecule.getMolecularFormula();
}
const properties = new moleculesDB.OCL.MoleculeProperties(molecule);
entry.properties.em = molecularFormula.absoluteWeight;
entry.properties.mf = molecularFormula.formula;
entry.properties.acceptorCount = properties.acceptorCount;
entry.properties.donorCount = properties.donorCount;
entry.properties.logP = properties.logP;
entry.properties.logS = properties.logS;
entry.properties.polarSurfaceArea = properties.polarSurfaceArea;
entry.properties.rotatableBondCount = properties.rotatableBondCount;
entry.properties.stereoCenterCount = properties.stereoCenterCount;
}
}
entry.data.push(data);
}
function pushMoleculeInfo(moleculesDB, moleculeInfo, data = {}) {
if (typeof moleculeInfo !== 'object') {
throw new Error('pushMoleculeInfo requires an object as first parameter');
}
const Molecule = moleculesDB.OCL.Molecule;
let molecule;
if (moleculeInfo.molfile) {
molecule = Molecule.fromMolfile(moleculeInfo.molfile);
}
if (moleculeInfo.smiles) molecule = Molecule.fromSmiles(moleculeInfo.smiles);
if (moleculeInfo.idCode) {
if (moleculesDB.db[moleculeInfo.idCode]) {
molecule = moleculesDB.db[moleculeInfo.idCode].molecule;
} else {
molecule = Molecule.fromIDCode(
moleculeInfo.idCode,
moleculeInfo.coordinates || false,
);
}
}
if (molecule) {
moleculesDB.pushEntry(molecule, data, moleculeInfo);
}
}
function search(moleculesDB, query, options = {}) {
const {
format = 'idCode',
mode = 'substructure',
flattenResult = true,
keepMolecule = false,
limit = Number.MAX_SAFE_INTEGER,
} = options;
if (typeof query === 'string') {
const moleculeCreators = getMoleculeCreators(moleculesDB.OCL.Molecule);
query = moleculeCreators.get(format.toLowerCase())(query);
} else if (!(query instanceof moleculesDB.OCL.Molecule)) {
throw new TypeError('toSearch must be a Molecule or string');
}
let result;
switch (mode.toLowerCase()) {
case 'exact':
result = exactSearch(moleculesDB, query);
break;
case 'substructure':
result = subStructureSearch(moleculesDB, query);
break;
case 'similarity':
result = similaritySearch(moleculesDB, query);
break;
default:
throw new Error(`unknown search mode: ${options.mode}`);
}
return processResult(result, { flattenResult, keepMolecule, limit });
}
function exactSearch(moleculesDB, query) {
const queryIDCode = query.getIDCode();
let searchResult = moleculesDB.db[queryIDCode]
? [moleculesDB.db[queryIDCode]]
: [];
return searchResult;
}
function subStructureSearch(moleculesDB, query) {
let resetFragment = false;
if (!query.isFragment()) {
resetFragment = true;
query.setFragment(true);
}
const queryMW = getMW(query);
const searchResult = [];
if (query.getAllAtoms() === 0) {
for (let idCode in moleculesDB.db) {
searchResult.push(moleculesDB.db[idCode]);
}
} else {
const queryIndex = query.getIndex();
const searcher = moleculesDB.searcher;
searcher.setFragment(query, queryIndex);
for (let idCode in moleculesDB.db) {
let entry = moleculesDB.db[idCode];
searcher.setMolecule(entry.molecule, entry.index);
if (searcher.isFragmentInMolecule()) {
searchResult.push(entry);
}
}
}
searchResult.sort(function (a, b) {
return (
Math.abs(queryMW - a.properties.mw) - Math.abs(queryMW - b.properties.mw)
);
});
if (resetFragment) {
query.setFragment(false);
}
return searchResult;
}
function similaritySearch(moleculesDB, query) {
const queryIndex = query.getIndex();
const queryMW = getMW(query);
const queryIdCode = query.getIDCode();
const searchResult = [];
let similarity;
for (let idCode in moleculesDB.db) {
let entry = moleculesDB.db[idCode];
if (entry.idCode === queryIdCode) {
similarity = Number.MAX_SAFE_INTEGER;
} else {
similarity =
moleculesDB.OCL.SSSearcherWithIndex.getSimilarityTanimoto(
queryIndex,
entry.index,
) *
1000000 -
Math.abs(queryMW - entry.properties.mw) / 10000;
}
searchResult.push({ similarity, entry });
}
searchResult.sort(function (a, b) {
return b.similarity - a.similarity;
});
return searchResult.map((entry) => entry.entry);
}
function getMW(query) {
let copy = query.getCompactCopy();
copy.setFragment(false);
return copy.getMolecularFormula().relativeWeight;
}
function processResult(entries, options = {}) {
const {
flattenResult = true,
keepMolecule = false,
limit = Number.MAX_SAFE_INTEGER,
} = options;
let results = [];
if (flattenResult) {
for (let entry of entries) {
for (let data of entry.data) {
results.push({
data,
idCode: entry.idCode,
properties: entry.properties,
molecule: keepMolecule ? entry.molecule : undefined,
});
}
}
} else {
for (let entry of entries) {
results.push({
data: entry.data,
idCode: entry.idCode,
properties: entry.properties,
molecule: keepMolecule ? entry.molecule : undefined,
});
}
}
if (limit < results.length) results.length = limit;
return results;
}
/*
this.db is an object with properties 'oclID' that has as value
an object that contains the following properties:
* molecule: an OCL molecule instance
* index: OCL index used for substructure searching
* properties: all the calculates properties
* data: array containing free data associated with this molecule
*/
class MoleculesDB {
/**
*
* @param {OCL} [OCL] The openchemlib library
* @param {object} [options={}]
* @param {boolean} [options.computeProperties=false]
*/
constructor(OCL, options = {}) {
const { computeProperties = false } = options;
this.OCL = OCL;
this.db = {};
this.statistics = null;
this.computeProperties = computeProperties;
this.searcher = new OCL.SSSearcherWithIndex();
}
/**
* append to the current database a CSV file
* @param {text|ArrayBuffer} csv - text file containing the comma separated value file
* @param {object} [options={}]
* @param {boolean} [options.header=true]
* @param {boolean} [options.dynamicTyping=true]
* @param {boolean} [options.skipEmptyLines=true]
* @param {function} [options.onStep] call back to execute after each molecule
*/
appendCSV(csv, options) {
return appendCSV(this, csv, {
computeProperties: this.computeProperties,
...options,
});
}
/**
* Append a SDF to the current database
* @param {text|ArrayBuffer} sdf - text file containing the sdf
* @param {object} [options={}]
* @param {function} [options.onStep] call back to execute after each molecule
* @returns {DB}
*/
appendSDF(sdf, options) {
return appendSDF(this, sdf, {
computeProperties: this.computeProperties,
...options,
});
}
/**
* Add a molecule to the current database
* @param {OCL.Molecule} molecule
* @param {object} [data={}]
* @param {object} [moleculeInfo={}] may contain precalculated index and mw
*/
pushEntry(molecule, data, moleculeInfo) {
pushEntry(this, molecule, data, moleculeInfo);
}
/**
* Add an netry in the database
* @param {object} moleculeInfo - a molecule as a JSON that may contain the following properties: molfile, smiles, idCode, mf, index
* @param {object} [data={}]
*/
pushMoleculeInfo(moleculeInfo, data) {
return pushMoleculeInfo(this, moleculeInfo, data);
}
/**
* Search in a MoleculesDB
* Inside the database all the same molecules are group together
* @param {string|OCL.Molecule} [query] smiles, molfile, oclCode or instance of Molecule to look for
* @param {object} [options={}]
* @param {string} [options.format='idCode'] - query is in the format 'smiles', 'oclid' or 'molfile'
* @param {string} [options.mode='substructure'] - search by 'substructure', 'exact' or 'similarity'
* @param {boolean} [options.flattenResult=true] - The database group the data for the same product. This allows to flatten the result
* @param {boolean} [options.keepMolecule=false] - keep the OCL.Molecule object in the result
* @param {number} [options.limit=Number.MAX_SAFE_INTEGER] - maximal number of result
* @return {Array} array of object of the type {(molecule), idCode, data, properties}
*/
search(query, options) {
return search(this, query, options);
}
/**
* Returns an array with the current database
* @returns
*/
getDB() {
return Object.keys(this.db).map((key) => this.db[key]);
}
}
exports.FULL_HOSE_CODE = FULL_HOSE_CODE;
exports.HOSE_CODE_CUT_C_SP3_SP3 = HOSE_CODE_CUT_C_SP3_SP3;
exports.MoleculesDB = MoleculesDB;
exports.addDiastereotopicMissingChirality = addDiastereotopicMissingChirality;

@@ -1069,0 +1485,0 @@ exports.combineSmiles = combineSmiles;

17

package.json
{
"name": "openchemlib-utils",
"version": "1.0.0",
"version": "1.1.0",
"description": "",

@@ -42,16 +42,19 @@ "main": "lib/index.js",

"devDependencies": {
"@babel/plugin-transform-modules-commonjs": "^7.13.8",
"@babel/plugin-transform-modules-commonjs": "^7.14.0",
"cheminfo-build": "^1.1.10",
"eslint": "^7.22.0",
"eslint-config-cheminfo": "^5.2.3",
"eslint": "^7.26.0",
"eslint-config-cheminfo": "^5.2.4",
"esm": "^3.2.25",
"jest": "^26.6.3",
"openchemlib": "7.4.0",
"prettier": "^2.2.1",
"rollup": "^2.42.3"
"prettier": "^2.3.0",
"rollup": "^2.47.0"
},
"dependencies": {
"atom-sorter": "^1.1.9",
"ensure-string": "^0.1.1",
"ml-floyd-warshall": "^1.0.3",
"ml-matrix": "^6.7.0"
"ml-matrix": "^6.8.0",
"papaparse": "^5.3.0",
"sdf-parser": "^4.0.2"
},

@@ -58,0 +61,0 @@ "peerDependencies": {

@@ -21,1 +21,3 @@ export * from './diastereotopic/addDiastereotopicMissingChirality';

export * from './path/getShortestPaths';
export * from './db/MoleculesDB';

@@ -7,2 +7,3 @@ const MAX_R = 10;

* @param {array} [fragments] Array of {smiles,R1,R2,...}
* @param {OCL} [OCL] The openchemlib library
* @param {object} [options={}]

@@ -9,0 +10,0 @@ * @param {function} [options.onStep] method to execute each new molecules

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc