Launch Week Day 3: Introducing Organization Notifications in Socket.Learn More
Socket
Book a DemoSign in
Socket

sdf-parser

Package Overview
Dependencies
Maintainers
4
Versions
21
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sdf-parser - npm Package Compare versions

Comparing version
4.0.2
to
5.0.0
+221
lib/index.js
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
var pipeline = require('pumpify');
var split2 = require('split2');
var through2 = require('through2');
var filter = require('through2-filter');
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
var pipeline__default = /*#__PURE__*/_interopDefaultLegacy(pipeline);
var split2__default = /*#__PURE__*/_interopDefaultLegacy(split2);
var through2__default = /*#__PURE__*/_interopDefaultLegacy(through2);
var filter__default = /*#__PURE__*/_interopDefaultLegacy(filter);
function getEntriesBoundaries(string, substring, eol) {
const res = [];
let previous = 0;
let next = 0;
while (next !== -1) {
next = string.indexOf(substring, previous);
if (next !== -1) {
res.push([previous, next]);
previous = next =
string.indexOf(eol, next + substring.length) + eol.length;
} else {
res.push([previous, string.length]);
}
}
return res;
}
function parse(sdf, options = {}) {
const {
include,
exclude,
filter,
modifiers = {},
forEach = {},
dynamicTyping = true,
} = options;
if (typeof sdf !== 'string') {
throw new TypeError('Parameter "sdf" must be a string');
}
let eol = '\n';
if (options.mixedEOL) {
sdf = sdf.replace(/\r\n/g, '\n');
sdf = sdf.replace(/\r/g, '\n');
} else {
// we will find the delimiter in order to be much faster and not use regular expression
let header = sdf.substr(0, 1000);
if (header.indexOf('\r\n') > -1) {
eol = '\r\n';
} else if (header.indexOf('\r') > -1) {
eol = '\r';
}
}
let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol);
let molecules = [];
let labels = {};
let start = Date.now();
for (let i = 0; i < entriesBoundaries.length; i++) {
let sdfPart = sdf.substring(...entriesBoundaries[i]);
let parts = sdfPart.split(`${eol}>`);
if (parts.length > 0 && parts[0].length > 5) {
let molecule = {};
let currentLabels = [];
molecule.molfile = parts[0] + eol;
for (let j = 1; j < parts.length; j++) {
let lines = parts[j].split(eol);
let from = lines[0].indexOf('<');
let to = lines[0].indexOf('>');
let label = lines[0].substring(from + 1, to);
currentLabels.push(label);
if (!labels[label]) {
labels[label] = {
counter: 0,
isNumeric: dynamicTyping,
keep: false,
};
if (
(!exclude || exclude.indexOf(label) === -1) &&
(!include || include.indexOf(label) > -1)
) {
labels[label].keep = true;
if (modifiers[label]) {
labels[label].modifier = modifiers[label];
}
if (forEach[label]) {
labels[label].forEach = forEach[label];
}
}
}
if (labels[label].keep) {
for (let k = 1; k < lines.length - 1; k++) {
if (molecule[label]) {
molecule[label] += eol + lines[k];
} else {
molecule[label] = lines[k];
}
}
if (labels[label].modifier) {
let modifiedValue = labels[label].modifier(molecule[label]);
if (modifiedValue === undefined || modifiedValue === null) {
delete molecule[label];
} else {
molecule[label] = modifiedValue;
}
}
if (labels[label].isNumeric) {
if (
!isFinite(molecule[label]) ||
molecule[label].match(/^0[0-9]/)
) {
labels[label].isNumeric = false;
}
}
}
}
if (!filter || filter(molecule)) {
molecules.push(molecule);
// only now we can increase the counter
for (let j = 0; j < currentLabels.length; j++) {
labels[currentLabels[j]].counter++;
}
}
}
}
// all numeric fields should be converted to numbers
for (let label in labels) {
let currentLabel = labels[label];
if (currentLabel.isNumeric) {
currentLabel.minValue = Infinity;
currentLabel.maxValue = -Infinity;
for (let j = 0; j < molecules.length; j++) {
if (molecules[j][label]) {
let value = parseFloat(molecules[j][label]);
molecules[j][label] = value;
if (value > currentLabel.maxValue) {
currentLabel.maxValue = value;
}
if (value < currentLabel.minValue) {
currentLabel.minValue = value;
}
}
}
}
}
// we check that a label is in all the records
for (let key in labels) {
if (labels[key].counter === molecules.length) {
labels[key].always = true;
} else {
labels[key].always = false;
}
}
let statistics = [];
for (let key in labels) {
let statistic = labels[key];
statistic.label = key;
statistics.push(statistic);
}
return {
time: Date.now() - start,
molecules: molecules,
labels: Object.keys(labels),
statistics: statistics,
};
}
const filterStream = filter__default["default"].bind(null, { objectMode: true });
function filterCb(chunk) {
return chunk.length > 1 && chunk.trim().length > 1;
}
function entries() {
return pipeline__default["default"].obj(
split2__default["default"](/\r?\n\${4}.*\r?\n/),
filterStream(filterCb),
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) {
const eol = value.includes('\r\n') ? '\r\n' : '\n';
this.push(`${value + eol}$$$$${eol}`);
callback();
}),
);
}
function molecules(options) {
return pipeline__default["default"].obj(
entries(),
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) {
try {
const parsed = parse(value, options);
if (parsed.molecules.length === 1) {
if (options && options.fullResult) {
this.push(parsed);
} else {
this.push(parsed.molecules[0]);
}
}
callback();
} catch (e) {
callback(e);
}
}),
);
}
exports.entries = entries;
exports.molecules = molecules;
exports.parse = parse;
import fs from 'fs';
import { parse } from '..';
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
describe('SDF Parser options', () => {
let result = parse(sdf, {
exclude: ['Number of H-Donors'],
include: ['Number of H-Donors', 'CLogP', 'Code'],
modifiers: {
CLogP: (field) => {
return {
low: field * 1 - 0.2,
high: field * 1 + 0.2,
};
},
},
filter: (entry) => {
return entry.CLogP && entry.CLogP.low > 4;
},
});
it('Check statistics', () => {
expect(result.statistics[0].counter).toBe(43);
expect(result.statistics[0].isNumeric).toBe(false);
expect(result.statistics[0].label).toBe('Code');
expect(result.statistics[0].always).toBe(true);
expect(result.statistics[4].counter).toBe(43);
expect(result.statistics[4].isNumeric).toBe(false);
expect(result.statistics[4].label).toBe('CLogP');
expect(result.statistics[4].always).toBe(true);
});
it('Check molecules', () => {
expect(result.molecules).toHaveLength(43);
let molecule = result.molecules[0];
expect(Object.keys(molecule)).toHaveLength(3);
expect(molecule.Code).toBe('0100380851');
expect(molecule.CLogP.low).toBeCloseTo(4.8, 0.0001);
expect(molecule.CLogP.high).toBeCloseTo(5.2, 0.0001);
expect(molecule.molfile.split('\n')).toHaveLength(56);
});
it('should throw with non-string argument', () => {
expect(() => {
parse();
}).toThrow(TypeError);
expect(() => {
parse(42);
}).toThrow(TypeError);
expect(() => {
parse({});
}).toThrow(TypeError);
});
});
import fs from 'fs';
import { parse } from '..';
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
describe('SDF Parser options and undefined', () => {
let result = parse(sdf, {
exclude: ['Number of H-Donors'],
include: ['Number of H-Donors', 'CLogP', 'Code'],
modifiers: {
CLogP: () => {
return undefined;
},
},
filter: (entry) => {
return entry.CLogP && entry.CLogP.low > 4;
},
});
it('Check molecules', () => {
expect(result.molecules).toHaveLength(0);
});
});
import fs from 'fs';
import { getEntriesBoundaries } from '../getEntriesBoundaries';
let sdf0 = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
let sdf1 = fs.readFileSync(`${__dirname}/test1.sdf`, 'utf-8');
let sdf2 = fs.readFileSync(`${__dirname}/test2.sdf`, 'utf-8');
[sdf0, sdf1, sdf2].forEach((sdf) => {
let eol = '\n';
let header = sdf.substr(0, 1000);
if (header.indexOf('\r\n') > -1) {
eol = '\r\n';
} else if (header.indexOf('\r') > -1) {
eol = '\r';
}
test('Split should match regex behavior', () => {
let sdfParts = sdf.split(new RegExp(`${eol}\\$\\$\\$\\$.*${eol}`));
expect(sdfParts).toStrictEqual(
getEntriesBoundaries(sdf, `${eol}$$$$`, eol).map((v) =>
sdf.substring(...v),
),
);
});
});
import fs from 'fs';
import { parse } from '..';
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
let sdf1 = fs.readFileSync(`${__dirname}/test1.sdf`, 'utf-8');
describe('SDF Parser', () => {
let result = parse(sdf);
it('Check statistics', () => {
expect(result.statistics[0].counter).toBe(128);
expect(result.statistics[0].isNumeric).toBe(false);
expect(result.statistics[0].label).toBe('Code');
expect(result.statistics[1].counter).toBe(128);
expect(result.statistics[1].minValue).toBe(0);
expect(result.statistics[1].maxValue).toBe(5);
expect(result.statistics[1].isNumeric).toBe(true);
expect(result.statistics[1].label).toBe('Number of H-Donors');
expect(result.statistics[0].always).toBe(true);
expect(result.statistics[4].always).toBe(false);
});
it('Check molecules', () => {
let molecule = result.molecules[0];
expect(molecule.Code).toContain('0100380824');
expect(molecule.CLogP).toBe(2.7);
expect(molecule.molfile.split('\n')).toHaveLength(37);
});
it('should throw with non-string argument', () => {
expect(() => {
parse();
}).toThrow(TypeError);
expect(() => {
parse(42);
}).toThrow(TypeError);
expect(() => {
parse({});
}).toThrow(TypeError);
});
});
describe('SDF Parser no dynamicTyping', () => {
let result = parse(sdf, {
dynamicTyping: false,
});
it('Check statistics', () => {
expect(result.statistics[0].counter).toBe(128);
expect(result.statistics[0].isNumeric).toBe(false);
expect(result.statistics[0].label).toBe('Code');
expect(result.statistics[1].counter).toBe(128);
expect(result.statistics[1].minValue).toBeUndefined();
expect(result.statistics[1].maxValue).toBeUndefined();
expect(result.statistics[1].isNumeric).toBe(false);
expect(result.statistics[1].label).toBe('Number of H-Donors');
expect(result.statistics[0].always).toBe(true);
expect(result.statistics[4].always).toBe(false);
});
it('Check molecules', () => {
let molecule = result.molecules[0];
expect(typeof molecule.Code).toBe('string');
expect(typeof molecule.CLogP).toBe('string');
expect(molecule.CLogP).toBe('2.700000000000000e+000');
expect(molecule.molfile.split('\n')).toHaveLength(37);
});
});
describe('SDF Parser one molecule', () => {
let result = parse(sdf1);
it('Check statistics', () => {
expect(result.molecules).toHaveLength(1);
});
});
import fs from 'fs';
import { parse } from '..';
describe('SDF Parser of non well formatted file', () => {
let sdf = fs.readFileSync(`${__dirname}/test2.sdf`, 'utf-8');
sdf = sdf.replace(/\r/g, '');
let result = parse(sdf, { mixedEOL: true });
it('Check molecules', () => {
let molecules = result.molecules;
expect(molecules).toHaveLength(7);
});
});
import fs from 'fs';
import callbackStream from 'callback-stream';
import OCL from 'openchemlib/minimal';
import { entries, molecules } from '..';
const cbStream = callbackStream.bind(null, { objectMode: true });
describe('stream', () => {
it('entries', () =>
new Promise((resolve) => {
fs.createReadStream(`${__dirname}/test.sdf`)
.pipe(entries())
.pipe(
cbStream((err, data) => {
expect(err).toBeNull();
expect(data).toHaveLength(128);
expect(data[0]).toContain('-ISIS- 04231216572D');
const mol = OCL.Molecule.fromMolfile(data[5]);
expect(mol.toMolfile()).toContain(
'17 18 0 0 0 0 0 0 0 0999 V2000',
);
resolve();
}),
);
}));
it('molecules', () =>
new Promise((resolve) => {
fs.createReadStream(`${__dirname}/test.sdf`)
.pipe(molecules())
.pipe(
cbStream((err, data) => {
expect(err).toBeNull();
expect(data).toHaveLength(128);
expect(data[0]).toMatchObject({
Code: '0100380824',
CLogP: 2.7,
});
expect(data[0].molfile).toContain('-ISIS- 04231216572D');
resolve();
}),
);
}));
it('molecules - full result', () =>
new Promise((resolve) => {
fs.createReadStream(`${__dirname}/test.sdf`)
.pipe(molecules({ fullResult: true }))
.pipe(
cbStream((err, data) => {
expect(err).toBeNull();
expect(data).toHaveLength(128);
expect(data[0]).toMatchObject({
labels: [
'Code',
'Number of H-Donors',
'Number of H-Acceptors',
'Number of Rotatable bonds',
'CLogP',
],
});
expect(data[0].molecules).toHaveLength(1);
resolve();
}),
);
}));
it('molecules with filter', () =>
new Promise((resolve) => {
fs.createReadStream(`${__dirname}/test.sdf`)
.pipe(
molecules({
filter: (entry) => entry.Code === '0100380869',
}),
)
.pipe(
cbStream((err, data) => {
expect(err).toBeNull();
expect(data).toHaveLength(1);
resolve();
}),
);
}));
it('async iteration', async () => {
const stream = fs
.createReadStream(`${__dirname}/test.sdf`)
.pipe(molecules());
let count = 0;
for await (const molecule of stream) {
count++;
expect(molecule.molfile.toString()).toContain('0999 V2000');
}
expect(count).toBe(128);
});
});

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

export function getEntriesBoundaries(string, substring, eol) {
const res = [];
let previous = 0;
let next = 0;
while (next !== -1) {
next = string.indexOf(substring, previous);
if (next !== -1) {
res.push([previous, next]);
previous = next =
string.indexOf(eol, next + substring.length) + eol.length;
} else {
res.push([previous, string.length]);
}
}
return res;
}
+23
-16
{
"name": "sdf-parser",
"version": "4.0.2",
"version": "5.0.0",
"description": "SDF parser",
"main": "./src/index.js",
"main": "lib/index.js",
"module": "src/index.js",
"files": [
"lib",
"src"
],
"scripts": {
"eslint": "eslint src __tests__",
"build": "npm run compile && cheminfo-build --root SDFParser",
"compile": "rollup -c",
"eslint": "eslint src",
"eslint-fix": "npm run eslint -- --fix",
"prepack": "npm run compile",
"prettier": "prettier --check src",
"prettier-write": "prettier --write src",
"test": "npm run test-coverage && npm run eslint",

@@ -21,3 +28,3 @@ "test-coverage": "jest --coverage",

"type": "git",
"url": "https://github.com/cheminfo-js/sdf-parser.git"
"url": "https://github.com/cheminfo/sdf-parser.git"
},

@@ -29,2 +36,3 @@ "keywords": [

"v2000",
"v3000",
"mdl"

@@ -35,23 +43,22 @@ ],

"bugs": {
"url": "https://github.com/cheminfo-js/sdf-parser/issues"
"url": "https://github.com/cheminfo/sdf-parser/issues"
},
"homepage": "https://github.com/cheminfo-js/sdf-parser",
"homepage": "https://github.com/cheminfo/sdf-parser",
"devDependencies": {
"@babel/plugin-transform-modules-commonjs": "^7.15.4",
"babel-eslint": "^10.1.0",
"callback-stream": "^1.1.0",
"eslint": "^7.3.1",
"eslint-config-cheminfo": "^4.0.0",
"eslint-plugin-import": "^2.22.0",
"eslint-plugin-jest": "^23.17.1",
"eslint-plugin-prettier": "^3.1.4",
"jest": "^26.1.0",
"openchemlib": "^7.2.3",
"prettier": "^2.0.5"
"cheminfo-build": "^1.1.11",
"eslint": "^7.32.0",
"eslint-config-cheminfo": "^6.0.1",
"jest": "^27.2.5",
"openchemlib": "^7.4.3",
"prettier": "^2.4.1"
},
"dependencies": {
"pumpify": "^2.0.1",
"split2": "^3.1.1",
"through2": "^3.0.2",
"split2": "^3.2.2",
"through2": "^4.0.2",
"through2-filter": "^3.0.0"
}
}

@@ -20,3 +20,3 @@ # sdf-parser

var parse = require('sdf-parser');
var { parse } = require('sdf-parser');

@@ -48,12 +48,12 @@ var fs = require('fs');

modifiers: {
CLogP: function(field) {
CLogP: function (field) {
return {
low: field * 1 - 0.2,
high: field * 1 + 0.2
high: field * 1 + 0.2,
};
}
},
},
filter: function(entry) {
filter: (entry) => {
return entry.CLogP && entry.CLogP.low > 4;
}
},
});

@@ -103,5 +103,5 @@ ```

[npm-url]: https://www.npmjs.com/package/sdf-parser
[travis-image]: https://img.shields.io/travis/cheminfo-js/sdf-parser/master.svg?style=flat-square
[travis-url]: https://travis-ci.org/cheminfo-js/sdf-parser
[travis-image]: https://img.shields.io/travis/cheminfo/sdf-parser/master.svg?style=flat-square
[travis-url]: https://travis-ci.org/cheminfo/sdf-parser
[download-image]: https://img.shields.io/npm/dm/sdf-parser.svg?style=flat-square
[download-url]: https://www.npmjs.com/package/sdf-parser

@@ -1,7 +0,2 @@

'use strict';
const parse = require('./parse');
const stream = require('./stream');
module.exports = parse;
parse.stream = stream;
export * from './parse';
export * from './stream';

@@ -1,4 +0,4 @@

'use strict';
import { getEntriesBoundaries } from './getEntriesBoundaries';
function parse(sdf, options = {}) {
export function parse(sdf, options = {}) {
const {

@@ -31,3 +31,3 @@ include,

let sdfParts = sdf.split(new RegExp(`${eol}\\$\\$\\$\\$.*${eol}`));
let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol);
let molecules = [];

@@ -38,4 +38,4 @@ let labels = {};

for (let i = 0; i < sdfParts.length; i++) {
let sdfPart = sdfParts[i];
for (let i = 0; i < entriesBoundaries.length; i++) {
let sdfPart = sdf.substring(...entriesBoundaries[i]);
let parts = sdfPart.split(`${eol}>`);

@@ -63,4 +63,8 @@ if (parts.length > 0 && parts[0].length > 5) {

labels[label].keep = true;
if (modifiers[label]) labels[label].modifier = modifiers[label];
if (forEach[label]) labels[label].forEach = forEach[label];
if (modifiers[label]) {
labels[label].modifier = modifiers[label];
}
if (forEach[label]) {
labels[label].forEach = forEach[label];
}
}

@@ -114,4 +118,8 @@ }

molecules[j][label] = value;
if (value > currentLabel.maxValue) currentLabel.maxValue = value;
if (value < currentLabel.minValue) currentLabel.minValue = value;
if (value > currentLabel.maxValue) {
currentLabel.maxValue = value;
}
if (value < currentLabel.minValue) {
currentLabel.minValue = value;
}
}

@@ -145,3 +153,1 @@ }

}
module.exports = parse;

@@ -1,3 +0,3 @@

'use strict';
const empty = {};
module.exports = {};
export default empty;

@@ -1,10 +0,8 @@

'use strict';
import pipeline from 'pumpify';
import split2 from 'split2';
import through2 from 'through2';
import filter from 'through2-filter';
const pipeline = require('pumpify');
const split2 = require('split2');
const through2 = require('through2');
const filter = require('through2-filter');
import { parse } from './parse';
const parse = require('./parse');
const filterStream = filter.bind(null, { objectMode: true });

@@ -15,7 +13,7 @@ function filterCb(chunk) {

function entries() {
export function entries() {
return pipeline.obj(
split2(/\r?\n\${4}.*\r?\n/),
filterStream(filterCb),
through2({ objectMode: true }, function (value, encoding, callback) {
through2({ objectMode: true }, function process(value, encoding, callback) {
const eol = value.includes('\r\n') ? '\r\n' : '\n';

@@ -28,6 +26,6 @@ this.push(`${value + eol}$$$$${eol}`);

function molecules(options) {
export function molecules(options) {
return pipeline.obj(
entries(),
through2({ objectMode: true }, function (value, encoding, callback) {
through2({ objectMode: true }, function process(value, encoding, callback) {
try {

@@ -49,6 +47,1 @@ const parsed = parse(value, options);

}
module.exports = {
entries,
molecules,
};
## [4.0.2](https://github.com/cheminfo-js/sdf-parser/compare/v4.0.1...v4.0.2) (2020-06-27)
## [4.0.1](https://github.com/cheminfo-js/sdf-parser/compare/v4.0.0...v4.0.1) (2019-06-12)
### Bug Fixes
* back to pumpify ([387c3eb](https://github.com/cheminfo-js/sdf-parser/commit/387c3eb))
# [4.0.0](https://github.com/cheminfo-js/sdf-parser/compare/v3.1.0...v4.0.0) (2019-06-12)
### Bug Fixes
* use pumpify instead of multipipe ([aed47fd](https://github.com/cheminfo-js/sdf-parser/commit/aed47fd))
### chore
* remove bower.json ([2cc05c5](https://github.com/cheminfo-js/sdf-parser/commit/2cc05c5))
* remove dist directory ([eba83f7](https://github.com/cheminfo-js/sdf-parser/commit/eba83f7))
### Features
* add support for stream async iteration ([f41105e](https://github.com/cheminfo-js/sdf-parser/commit/f41105e))
### BREAKING CHANGES
* The "dist" directory is no longer built and published.
* Bower is no longer supported.
# [3.1.0](https://github.com/cheminfo-js/sdf-parser/compare/v3.0.1...v3.1.0) (2018-01-26)
## [3.0.1](https://github.com/cheminfo-js/sdf-parser/compare/v3.0.0...v3.0.1) (2018-01-06)
# [3.0.0](https://github.com/cheminfo-js/sdf-parser/compare/v2.3.1...v3.0.0) (2018-01-05)
### Features
* add stream functions ([ac7ed88](https://github.com/cheminfo-js/sdf-parser/commit/ac7ed88))
## [2.3.1](https://github.com/cheminfo-js/sdf-parser/compare/v2.3.0...v2.3.1) (2017-07-04)
# [2.3.0](https://github.com/cheminfo-js/sdf-parser/compare/v2.2.2...v2.3.0) (2017-07-04)
## [2.2.2](https://github.com/cheminfo-js/sdf-parser/compare/v2.2.1...v2.2.2) (2016-06-23)
## [2.2.1](https://github.com/cheminfo-js/sdf-parser/compare/v2.2.0...v2.2.1) (2016-06-07)
# [2.2.0](https://github.com/cheminfo-js/sdf-parser/compare/v2.1.1...v2.2.0) (2016-06-07)
## [2.1.1](https://github.com/cheminfo-js/sdf-parser/compare/v2.1.0...v2.1.1) (2016-06-06)
# [2.1.0](https://github.com/cheminfo-js/sdf-parser/compare/v2.0.1...v2.1.0) (2016-06-06)
## [2.0.1](https://github.com/cheminfo-js/sdf-parser/compare/v2.0.0...v2.0.1) (2016-05-21)
# [2.0.0](https://github.com/cheminfo-js/sdf-parser/compare/v1.0.1...v2.0.0) (2016-05-21)
## [1.0.1](https://github.com/cheminfo-js/sdf-parser/compare/v1.0.0...v1.0.1) (2015-06-10)
# 1.0.0 (2015-02-13)