Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

sdf-parser

Package Overview
Dependencies
Maintainers
6
Versions
16
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

sdf-parser - npm Package Compare versions

Comparing version 3.1.0 to 4.0.0

History.md

30

package.json
{
"name": "sdf-parser",
"version": "3.1.0",
"version": "4.0.0",
"description": "SDF parser",

@@ -12,4 +12,5 @@ "main": "./src/index.js",

"eslint-fix": "npm run eslint -- --fix",
"test": "jest && npm run eslint",
"build": "cheminfo build --root SDFParser"
"test": "npm run test-coverage && npm run eslint",
"test-coverage": "jest --coverage",
"test-only": "jest"
},

@@ -37,17 +38,18 @@ "browser": {

"devDependencies": {
"babel-eslint": "^10.0.1",
"callback-stream": "^1.1.0",
"cheminfo-tools": "^1.20.2",
"eslint": "^4.16.0",
"eslint-config-cheminfo": "^1.14.1",
"eslint-plugin-no-only-tests": "^2.0.0",
"jest": "^22.1.4",
"openchemlib": "^5.5.0",
"should": "^13.2.1"
"eslint": "^5.16.0",
"eslint-config-cheminfo": "^1.20.1",
"eslint-plugin-import": "^2.17.3",
"eslint-plugin-jest": "^22.6.4",
"jest": "^24.8.0",
"openchemlib": "^7.1.0"
},
"dependencies": {
"multipipe": "^2.0.1",
"split2": "^2.2.0",
"through2": "^2.0.3",
"through2-filter": "^2.0.0"
"@targos/pumpify": "^2.0.0",
"readable-stream": "^3.4.0",
"split2": "^3.1.1",
"through2": "^3.0.1",
"through2-filter": "^3.0.0"
}
}
# sdf-parser
[![NPM version][npm-image]][npm-url]
[![build status][travis-image]][travis-url]
[![David deps][david-image]][david-url]
[![npm download][download-image]][download-url]
[![NPM version][npm-image]][npm-url]
[![build status][travis-image]][travis-url]
[![npm download][download-image]][download-url]
Allow to parse a SDF file and convert it to an array of objects
Allow to parse a SDF file and convert it to an array of objects.

@@ -17,4 +16,4 @@ ## Use of the package

In node script:
```js
// allows to parse a file test.sdf that would be present in the same directory

@@ -29,3 +28,2 @@

console.log(result);
```

@@ -36,25 +34,27 @@

options:
* exclude : array of string containing the fields to discard
* include : array of string containing the fields to keep
* modifiers : object of functions that need to be converted during the parsing
* filter : function that allows to filter the result
* mixedEOL : if set to true will try to deal with mixed End Of Line separator
* dynamicTyping : convert fields containing only number to numbers (default: true)
- exclude : array of string containing the fields to discard
- include : array of string containing the fields to keep
- modifiers : object of functions that need to be converted during the parsing
- filter : function that allows to filter the result
- mixedEOL : if set to true will try to deal with mixed End Of Line separator
- dynamicTyping : convert fields containing only number to numbers (default: true)
## Advanced example with filtering and modifiers
```
```js
var result = parse(sdf, {
exclude:["Number of H-Donors"],
include:["Number of H-Donors",'CLogP','Code'],
modifiers: {
CLogP: function(field) {
return {
low: field*1-0.2,
high: field*1+0.2
}
}
},
filter: function(entry) {
return (entry.CLogP && entry.CLogP.low>4);
exclude: ['Number of H-Donors'],
include: ['Number of H-Donors', 'CLogP', 'Code'],
modifiers: {
CLogP: function(field) {
return {
low: field * 1 - 0.2,
high: field * 1 + 0.2
};
}
},
filter: function(entry) {
return entry.CLogP && entry.CLogP.low > 4;
}
});

@@ -65,3 +65,3 @@ ```

This API is only available on Node.js
This API is only available on Node.js.

@@ -78,8 +78,8 @@ ### molecules(options)

```js
const stream = require('sdf-parser').stream;
const { stream } = require('sdf-parser');
fs.createReadStream('test.sdf')
.pipe(stream.molecules())
.on('data', (molecule) => {
console.log(molecule.molfile);
});
.pipe(stream.molecules())
.on('data', (molecule) => {
console.log(molecule.molfile);
});
```

@@ -92,8 +92,8 @@

```js
const stream = require('sdf-parser').stream;
const { stream } = require('sdf-parser');
fs.createReadStream('test.sdf')
.pipe(stream.entries())
.on('data', (entry) => {
// sdf entry as a string
});
.pipe(stream.entries())
.on('data', (entry) => {
// sdf entry as a string
});
```

@@ -103,3 +103,3 @@

[MIT](./LICENSE)
[MIT](./LICENSE)

@@ -110,5 +110,3 @@ [npm-image]: https://img.shields.io/npm/v/sdf-parser.svg?style=flat-square

[travis-url]: https://travis-ci.org/cheminfo-js/sdf-parser
[david-image]: https://img.shields.io/david/cheminfo-js/sdf-parser.svg?style=flat-square
[david-url]: https://david-dm.org/cheminfo-js/sdf-parser
[download-image]: https://img.shields.io/npm/dm/sdf-parser.svg?style=flat-square
[download-url]: https://www.npmjs.com/package/sdf-parser
'use strict';
function parse(sdf, options = {}) {
const {
include,
exclude,
filter,
modifiers = {},
forEach = {},
dynamicTyping = true
} = options;
const {
include,
exclude,
filter,
modifiers = {},
forEach = {},
dynamicTyping = true
} = options;
if (typeof sdf !== 'string') {
throw new TypeError('Parameter "sdf" must be a string');
}
if (typeof sdf !== 'string') {
throw new TypeError('Parameter "sdf" must be a string');
}
var eol = '\n';
if (options.mixedEOL) {
sdf = sdf.replace(/\r\n/g, '\n');
sdf = sdf.replace(/\r/g, '\n');
} else {
// we will find the delimiter in order to be much faster and not use regular expression
var header = sdf.substr(0, 1000);
if (header.indexOf('\r\n') > -1) {
eol = '\r\n';
} else if (header.indexOf('\r') > -1) {
eol = '\r';
}
var eol = '\n';
if (options.mixedEOL) {
sdf = sdf.replace(/\r\n/g, '\n');
sdf = sdf.replace(/\r/g, '\n');
} else {
// we will find the delimiter in order to be much faster and not use regular expression
var header = sdf.substr(0, 1000);
if (header.indexOf('\r\n') > -1) {
eol = '\r\n';
} else if (header.indexOf('\r') > -1) {
eol = '\r';
}
}
var sdfParts = sdf.split(new RegExp(eol + '\\$\\$\\$\\$.*' + eol));
var molecules = [];
var labels = {};
var sdfParts = sdf.split(new RegExp(`${eol}\\$\\$\\$\\$.*${eol}`));
var molecules = [];
var labels = {};
var start = Date.now();
var start = Date.now();
for (var i = 0; i < sdfParts.length; i++) {
var sdfPart = sdfParts[i];
var parts = sdfPart.split(eol + '>');
if (parts.length > 0 && parts[0].length > 5) {
var molecule = {};
var currentLabels = [];
molecule.molfile = parts[0] + eol;
for (var j = 1; j < parts.length; j++) {
var lines = parts[j].split(eol);
var from = lines[0].indexOf('<');
var to = lines[0].indexOf('>');
var label = lines[0].substring(from + 1, to);
currentLabels.push(label);
if (!labels[label]) {
labels[label] = {
counter: 0,
isNumeric: dynamicTyping,
keep: false
};
if (
(!exclude || exclude.indexOf(label) === -1) &&
(!include || include.indexOf(label) > -1)
) {
labels[label].keep = true;
if (modifiers[label]) labels[label].modifier = modifiers[label];
if (forEach[label]) labels[label].forEach = forEach[label];
}
}
if (labels[label].keep) {
for (var k = 1; k < lines.length - 1; k++) {
if (molecule[label]) {
molecule[label] += eol + lines[k];
} else {
molecule[label] = lines[k];
}
}
if (labels[label].modifier) {
var modifiedValue = labels[label].modifier(molecule[label]);
if (modifiedValue === undefined || modifiedValue === null) {
delete molecule[label];
} else {
molecule[label] = modifiedValue;
}
}
if (labels[label].isNumeric) {
if (!isFinite(molecule[label]) || molecule[label].match(/^0[0-9]/)) {
labels[label].isNumeric = false;
}
}
}
for (var i = 0; i < sdfParts.length; i++) {
var sdfPart = sdfParts[i];
var parts = sdfPart.split(`${eol}>`);
if (parts.length > 0 && parts[0].length > 5) {
var molecule = {};
var currentLabels = [];
molecule.molfile = parts[0] + eol;
for (var j = 1; j < parts.length; j++) {
var lines = parts[j].split(eol);
var from = lines[0].indexOf('<');
var to = lines[0].indexOf('>');
var label = lines[0].substring(from + 1, to);
currentLabels.push(label);
if (!labels[label]) {
labels[label] = {
counter: 0,
isNumeric: dynamicTyping,
keep: false
};
if (
(!exclude || exclude.indexOf(label) === -1) &&
(!include || include.indexOf(label) > -1)
) {
labels[label].keep = true;
if (modifiers[label]) labels[label].modifier = modifiers[label];
if (forEach[label]) labels[label].forEach = forEach[label];
}
}
if (labels[label].keep) {
for (var k = 1; k < lines.length - 1; k++) {
if (molecule[label]) {
molecule[label] += eol + lines[k];
} else {
molecule[label] = lines[k];
}
if (!filter || filter(molecule)) {
molecules.push(molecule);
// only now we can increase the counter
for (j = 0; j < currentLabels.length; j++) {
var currentLabel = currentLabels[j];
labels[currentLabel].counter++;
}
}
if (labels[label].modifier) {
var modifiedValue = labels[label].modifier(molecule[label]);
if (modifiedValue === undefined || modifiedValue === null) {
delete molecule[label];
} else {
molecule[label] = modifiedValue;
}
}
}
// all numeric fields should be converted to numbers
for (label in labels) {
currentLabel = labels[label];
if (currentLabel.isNumeric) {
currentLabel.minValue = Infinity;
currentLabel.maxValue = -Infinity;
for (j = 0; j < molecules.length; j++) {
if (molecules[j][label]) {
var value = parseFloat(molecules[j][label]);
molecules[j][label] = value;
if (value > currentLabel.maxValue) currentLabel.maxValue = value;
if (value < currentLabel.minValue) currentLabel.minValue = value;
}
}
if (labels[label].isNumeric) {
if (
!isFinite(molecule[label]) ||
molecule[label].match(/^0[0-9]/)
) {
labels[label].isNumeric = false;
}
}
}
}
if (!filter || filter(molecule)) {
molecules.push(molecule);
// only now we can increase the counter
for (j = 0; j < currentLabels.length; j++) {
var currentLabel = currentLabels[j];
labels[currentLabel].counter++;
}
}
}
}
// we check that a label is in all the records
for (var key in labels) {
if (labels[key].counter === molecules.length) {
labels[key].always = true;
} else {
labels[key].always = false;
// all numeric fields should be converted to numbers
for (label in labels) {
currentLabel = labels[label];
if (currentLabel.isNumeric) {
currentLabel.minValue = Infinity;
currentLabel.maxValue = -Infinity;
for (j = 0; j < molecules.length; j++) {
if (molecules[j][label]) {
var value = parseFloat(molecules[j][label]);
molecules[j][label] = value;
if (value > currentLabel.maxValue) currentLabel.maxValue = value;
if (value < currentLabel.minValue) currentLabel.minValue = value;
}
}
}
}
var statistics = [];
for (key in labels) {
var statistic = labels[key];
statistic.label = key;
statistics.push(statistic);
// we check that a label is in all the records
for (var key in labels) {
if (labels[key].counter === molecules.length) {
labels[key].always = true;
} else {
labels[key].always = false;
}
}
return {
time: Date.now() - start,
molecules: molecules,
labels: Object.keys(labels),
statistics: statistics
};
var statistics = [];
for (key in labels) {
var statistic = labels[key];
statistic.label = key;
statistics.push(statistic);
}
return {
time: Date.now() - start,
molecules: molecules,
labels: Object.keys(labels),
statistics: statistics
};
}
module.exports = parse;
'use strict';
const combine = require('multipipe');
const pipeline = require('@targos/pumpify');
const split2 = require('split2');

@@ -10,43 +10,43 @@ const filter = require('through2-filter');

const filterStream = filter.bind(null, {objectMode: true});
const filterStream = filter.bind(null, { objectMode: true });
function filterCb(chunk) {
return chunk.length > 1 && chunk.trim().length > 1;
return chunk.length > 1 && chunk.trim().length > 1;
}
function entries() {
return combine(
split2(/\r?\n\${4}.*\r?\n/),
filterStream(filterCb),
through2({objectMode: true}, function (value, encoding, callback) {
const eol = value.includes('\r\n') ? '\r\n' : '\n';
this.push(value + eol + '$$$$' + eol);
callback();
})
);
return pipeline.obj(
split2(/\r?\n\${4}.*\r?\n/),
filterStream(filterCb),
through2({ objectMode: true }, function (value, encoding, callback) {
const eol = value.includes('\r\n') ? '\r\n' : '\n';
this.push(`${value + eol}$$$$${eol}`);
callback();
})
);
}
function molecules(options) {
return combine(
entries(),
through2({objectMode: true}, function (value, encoding, callback) {
try {
const parsed = parse(value, options);
if (parsed.molecules.length === 1) {
if (options && options.fullResult) {
this.push(parsed);
} else {
this.push(parsed.molecules[0]);
}
}
callback();
} catch (e) {
callback(e);
}
})
);
return pipeline.obj(
entries(),
through2({ objectMode: true }, function (value, encoding, callback) {
try {
const parsed = parse(value, options);
if (parsed.molecules.length === 1) {
if (options && options.fullResult) {
this.push(parsed);
} else {
this.push(parsed.molecules[0]);
}
}
callback();
} catch (e) {
callback(e);
}
})
);
}
module.exports = {
entries,
molecules
entries,
molecules
};
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc