sdf-parser
Advanced tools
Comparing version 5.0.2 to 6.0.0
271
lib/index.js
@@ -6,14 +6,5 @@ 'use strict'; | ||
var ensureString = require('ensure-string'); | ||
var pipeline = require('pumpify'); | ||
var split2 = require('split2'); | ||
var through2 = require('through2'); | ||
var filter = require('through2-filter'); | ||
var readline = require('readline'); | ||
var dynamicTyping = require('dynamic-typing'); | ||
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; } | ||
var pipeline__default = /*#__PURE__*/_interopDefaultLegacy(pipeline); | ||
var split2__default = /*#__PURE__*/_interopDefaultLegacy(split2); | ||
var through2__default = /*#__PURE__*/_interopDefaultLegacy(through2); | ||
var filter__default = /*#__PURE__*/_interopDefaultLegacy(filter); | ||
function getEntriesBoundaries(string, substring, eol) { | ||
@@ -41,21 +32,75 @@ const res = []; | ||
function getMolecule$1(sdfPart, labels, currentLabels, options) { | ||
let parts = sdfPart.split(`${options.eol}>`); | ||
if (parts.length === 0 || parts[0].length <= 5) return; | ||
let molecule = {}; | ||
molecule.molfile = parts[0] + options.eol; | ||
for (let j = 1; j < parts.length; j++) { | ||
let lines = parts[j].split(options.eol); | ||
let from = lines[0].indexOf('<'); | ||
let to = lines[0].indexOf('>'); | ||
let label = lines[0].substring(from + 1, to); | ||
currentLabels.push(label); | ||
if (!labels[label]) { | ||
labels[label] = { | ||
counter: 0, | ||
isNumeric: options.dynamicTyping, | ||
keep: false, | ||
}; | ||
if ( | ||
(!options.exclude || options.exclude.indexOf(label) === -1) && | ||
(!options.include || options.include.indexOf(label) > -1) | ||
) { | ||
labels[label].keep = true; | ||
if (options.modifiers[label]) { | ||
labels[label].modifier = options.modifiers[label]; | ||
} | ||
if (options.forEach[label]) { | ||
labels[label].forEach = options.forEach[label]; | ||
} | ||
} | ||
} | ||
if (labels[label].keep) { | ||
for (let k = 1; k < lines.length - 1; k++) { | ||
if (molecule[label]) { | ||
molecule[label] += options.eol + lines[k]; | ||
} else { | ||
molecule[label] = lines[k]; | ||
} | ||
} | ||
if (labels[label].modifier) { | ||
let modifiedValue = labels[label].modifier(molecule[label]); | ||
if (modifiedValue === undefined || modifiedValue === null) { | ||
delete molecule[label]; | ||
} else { | ||
molecule[label] = modifiedValue; | ||
} | ||
} | ||
if (labels[label].isNumeric) { | ||
if (!isFinite(molecule[label]) || molecule[label].match(/^0[0-9]/)) { | ||
labels[label].isNumeric = false; | ||
} | ||
} | ||
} | ||
} | ||
return molecule; | ||
} | ||
/** | ||
* Parse a SDF file | ||
* @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse | ||
* @param {any} [options={}] | ||
* @param {array<string>} [options.include] List of fields to include | ||
* @param {array<string>} [options.exclude] List of fields to exclude | ||
* @param {object} [options={}] | ||
* @param {string[]} [options.include] List of fields to include | ||
* @param {string[]} [options.exclude] List of fields to exclude | ||
* @param {Function} [options.filter] Callback allowing to filter the molecules | ||
* @param {boolean} [options.dynamicTyping] Dynamically type the data | ||
* @param {object} [options.modifiers] Object containing callbacks to apply on some specific fields | ||
* @param {boolean} [options.mixedEOL=false] Set to true if you know there is a mixture between \r\n and \n | ||
* @param {string} [options.eol] Specify the end of line character. Default will be the one found in the file | ||
*/ | ||
function parse(sdf, options = {}) { | ||
const { | ||
include, | ||
exclude, | ||
filter, | ||
modifiers = {}, | ||
forEach = {}, | ||
dynamicTyping = true, | ||
} = options; | ||
options = { ...options }; | ||
if (options.modifiers === undefined) options.modifiers = {}; | ||
if (options.forEach === undefined) options.forEach = {}; | ||
if (options.dynamicTyping === undefined) options.dynamicTyping = true; | ||
@@ -67,17 +112,23 @@ sdf = ensureString.ensureString(sdf); | ||
let eol = '\n'; | ||
if (options.mixedEOL) { | ||
sdf = sdf.replace(/\r\n/g, '\n'); | ||
sdf = sdf.replace(/\r/g, '\n'); | ||
} else { | ||
// we will find the delimiter in order to be much faster and not use regular expression | ||
let header = sdf.substr(0, 1000); | ||
if (header.indexOf('\r\n') > -1) { | ||
eol = '\r\n'; | ||
} else if (header.indexOf('\r') > -1) { | ||
eol = '\r'; | ||
if (options.eol === undefined) { | ||
options.eol = '\n'; | ||
if (options.mixedEOL) { | ||
sdf = sdf.replace(/\r\n/g, '\n'); | ||
sdf = sdf.replace(/\r/g, '\n'); | ||
} else { | ||
// we will find the delimiter in order to be much faster and not use regular expression | ||
let header = sdf.substr(0, 1000); | ||
if (header.indexOf('\r\n') > -1) { | ||
options.eol = '\r\n'; | ||
} else if (header.indexOf('\r') > -1) { | ||
options.eol = '\r'; | ||
} | ||
} | ||
} | ||
let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol); | ||
let entriesBoundaries = getEntriesBoundaries( | ||
sdf, | ||
`${options.eol}$$$$`, | ||
options.eol, | ||
); | ||
let molecules = []; | ||
@@ -90,68 +141,14 @@ let labels = {}; | ||
let sdfPart = sdf.substring(...entriesBoundaries[i]); | ||
let parts = sdfPart.split(`${eol}>`); | ||
if (parts.length > 0 && parts[0].length > 5) { | ||
let molecule = {}; | ||
let currentLabels = []; | ||
molecule.molfile = parts[0] + eol; | ||
for (let j = 1; j < parts.length; j++) { | ||
let lines = parts[j].split(eol); | ||
let from = lines[0].indexOf('<'); | ||
let to = lines[0].indexOf('>'); | ||
let label = lines[0].substring(from + 1, to); | ||
currentLabels.push(label); | ||
if (!labels[label]) { | ||
labels[label] = { | ||
counter: 0, | ||
isNumeric: dynamicTyping, | ||
keep: false, | ||
}; | ||
if ( | ||
(!exclude || exclude.indexOf(label) === -1) && | ||
(!include || include.indexOf(label) > -1) | ||
) { | ||
labels[label].keep = true; | ||
if (modifiers[label]) { | ||
labels[label].modifier = modifiers[label]; | ||
} | ||
if (forEach[label]) { | ||
labels[label].forEach = forEach[label]; | ||
} | ||
} | ||
} | ||
if (labels[label].keep) { | ||
for (let k = 1; k < lines.length - 1; k++) { | ||
if (molecule[label]) { | ||
molecule[label] += eol + lines[k]; | ||
} else { | ||
molecule[label] = lines[k]; | ||
} | ||
} | ||
if (labels[label].modifier) { | ||
let modifiedValue = labels[label].modifier(molecule[label]); | ||
if (modifiedValue === undefined || modifiedValue === null) { | ||
delete molecule[label]; | ||
} else { | ||
molecule[label] = modifiedValue; | ||
} | ||
} | ||
if (labels[label].isNumeric) { | ||
if ( | ||
!isFinite(molecule[label]) || | ||
molecule[label].match(/^0[0-9]/) | ||
) { | ||
labels[label].isNumeric = false; | ||
} | ||
} | ||
} | ||
let currentLabels = []; | ||
const molecule = getMolecule$1(sdfPart, labels, currentLabels, options); | ||
if (!molecule) continue; | ||
if (!options.filter || options.filter(molecule)) { | ||
molecules.push(molecule); | ||
// only now we can increase the counter | ||
for (let j = 0; j < currentLabels.length; j++) { | ||
labels[currentLabels[j]].counter++; | ||
} | ||
if (!filter || filter(molecule)) { | ||
molecules.push(molecule); | ||
// only now we can increase the counter | ||
for (let j = 0; j < currentLabels.length; j++) { | ||
labels[currentLabels[j]].counter++; | ||
} | ||
} | ||
} | ||
} | ||
// all numeric fields should be converted to numbers | ||
@@ -202,43 +199,55 @@ for (let label in labels) { | ||
const filterStream = filter__default["default"].bind(null, { objectMode: true }); | ||
/** | ||
* Parse a SDF file | ||
* @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse | ||
* @param {object} [options={}] | ||
* @param {Function} [options.filter] Callback allowing to filter the molecules | ||
* @param {boolean} [options.dynamicTyping] Dynamically type the data | ||
*/ | ||
function filterCb(chunk) { | ||
return chunk.length > 1 && chunk.trim().length > 1; | ||
} | ||
async function* iterator(readStream, options = {}) { | ||
const lines = readline.createInterface(readStream); | ||
const currentLines = []; | ||
options = { ...options }; | ||
if (options.dynamicTyping === undefined) options.dynamicTyping = true; | ||
function entries() { | ||
return pipeline__default["default"].obj( | ||
split2__default["default"](/\r?\n\${4}.*\r?\n/), | ||
filterStream(filterCb), | ||
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) { | ||
const eol = value.includes('\r\n') ? '\r\n' : '\n'; | ||
this.push(`${value + eol}$$$$${eol}`); | ||
callback(); | ||
}), | ||
); | ||
options.eol = '\n'; | ||
for await (let line of lines) { | ||
if (line.startsWith('$$$$')) { | ||
const molecule = getMolecule(currentLines.join(options.eol), options); | ||
if (!options.filter || options.filter(molecule)) { | ||
yield molecule; | ||
} | ||
currentLines.length = 0; | ||
} else { | ||
currentLines.push(line); | ||
} | ||
} | ||
} | ||
function molecules(options) { | ||
return pipeline__default["default"].obj( | ||
entries(), | ||
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) { | ||
try { | ||
const parsed = parse(value, options); | ||
if (parsed.molecules.length === 1) { | ||
if (options && options.fullResult) { | ||
this.push(parsed); | ||
} else { | ||
this.push(parsed.molecules[0]); | ||
} | ||
} | ||
callback(); | ||
} catch (e) { | ||
callback(e); | ||
function getMolecule(sdfPart, options) { | ||
let parts = sdfPart.split(`${options.eol}>`); | ||
if (parts.length === 0 || parts[0].length <= 5) return; | ||
let molecule = {}; | ||
molecule.molfile = parts[0] + options.eol; | ||
for (let j = 1; j < parts.length; j++) { | ||
let lines = parts[j].split(options.eol); | ||
let from = lines[0].indexOf('<'); | ||
let to = lines[0].indexOf('>'); | ||
let label = lines[0].substring(from + 1, to); | ||
for (let k = 1; k < lines.length - 1; k++) { | ||
if (molecule[label]) { | ||
molecule[label] += options.eol + lines[k]; | ||
} else { | ||
molecule[label] = lines[k]; | ||
} | ||
}), | ||
); | ||
} | ||
if (options.dynamicTyping) { | ||
molecule[label] = dynamicTyping.parseString(molecule[label]); | ||
} | ||
} | ||
return molecule; | ||
} | ||
exports.entries = entries; | ||
exports.molecules = molecules; | ||
exports.iterator = iterator; | ||
exports.parse = parse; |
{ | ||
"name": "sdf-parser", | ||
"version": "5.0.2", | ||
"version": "6.0.0", | ||
"description": "SDF parser", | ||
@@ -52,2 +52,3 @@ "main": "lib/index.js", | ||
"eslint-config-cheminfo": "^8.0.2", | ||
"filelist-utils": "^0.6.0", | ||
"jest": "^28.1.3", | ||
@@ -58,8 +59,5 @@ "openchemlib": "^8.0.1", | ||
"dependencies": { | ||
"ensure-string": "^1.2.0", | ||
"pumpify": "^2.0.1", | ||
"split2": "^4.1.0", | ||
"through2": "^4.0.2", | ||
"through2-filter": "^3.0.0" | ||
"dynamic-typing": "^1.0.0", | ||
"ensure-string": "^1.2.0" | ||
} | ||
} |
@@ -60,37 +60,16 @@ # sdf-parser | ||
## Streams | ||
## Iterator | ||
This API is only available on Node.js. | ||
### molecules(options) | ||
Transform an input text stream to a stream of molecule objects. | ||
#### options | ||
- `fullResult`: true to emit the full result of `parse` instead of just the molecules. | ||
- All other options from the `parse` function. | ||
```js | ||
const { stream } = require('sdf-parser'); | ||
fs.createReadStream('test.sdf') | ||
.pipe(stream.molecules()) | ||
.on('data', (molecule) => { | ||
console.log(molecule.molfile); | ||
}); | ||
const { iterator } = require('sdf-parser'); | ||
const readStream = createReadStream(join(__dirname, 'test.sdf.gz')); | ||
const stream = readStream.pipe(createGunzip()); | ||
const results = []; | ||
for await (const entry of iterator(stream)) { | ||
results.push(entry); | ||
} | ||
``` | ||
### entries() | ||
Transform an input text stream to a stream of sdf entries. | ||
```js | ||
const { stream } = require('sdf-parser'); | ||
fs.createReadStream('test.sdf') | ||
.pipe(stream.entries()) | ||
.on('data', (entry) => { | ||
// sdf entry as a string | ||
}); | ||
``` | ||
## License | ||
@@ -102,5 +81,5 @@ | ||
[npm-url]: https://www.npmjs.com/package/sdf-parser | ||
[travis-image]: https://img.shields.io/travis/cheminfo/sdf-parser/master.svg?style=flat-square | ||
[travis-image]: https://img.shields.io/travis/cheminfo/sdf-parser/main.svg?style=flat-square | ||
[travis-url]: https://travis-ci.org/cheminfo/sdf-parser | ||
[download-image]: https://img.shields.io/npm/dm/sdf-parser.svg?style=flat-square | ||
[download-url]: https://www.npmjs.com/package/sdf-parser |
export * from './parse'; | ||
export * from './stream'; | ||
export * from './iterator'; |
123
src/parse.js
import { ensureString } from 'ensure-string'; | ||
import { getEntriesBoundaries } from './getEntriesBoundaries'; | ||
import { getMolecule } from './util/getMolecule'; | ||
/** | ||
* Parse a SDF file | ||
* @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse | ||
* @param {any} [options={}] | ||
* @param {array<string>} [options.include] List of fields to include | ||
* @param {array<string>} [options.exclude] List of fields to exclude | ||
* @param {object} [options={}] | ||
* @param {string[]} [options.include] List of fields to include | ||
* @param {string[]} [options.exclude] List of fields to exclude | ||
* @param {Function} [options.filter] Callback allowing to filter the molecules | ||
* @param {boolean} [options.dynamicTyping] Dynamically type the data | ||
* @param {object} [options.modifiers] Object containing callbacks to apply on some specific fields | ||
* @param {boolean} [options.mixedEOL=false] Set to true if you know there is a mixture between \r\n and \n | ||
* @param {string} [options.eol] Specify the end of line character. Default will be the one found in the file | ||
*/ | ||
export function parse(sdf, options = {}) { | ||
const { | ||
include, | ||
exclude, | ||
filter, | ||
modifiers = {}, | ||
forEach = {}, | ||
dynamicTyping = true, | ||
} = options; | ||
options = { ...options }; | ||
if (options.modifiers === undefined) options.modifiers = {}; | ||
if (options.forEach === undefined) options.forEach = {}; | ||
if (options.dynamicTyping === undefined) options.dynamicTyping = true; | ||
@@ -29,17 +28,23 @@ sdf = ensureString(sdf); | ||
let eol = '\n'; | ||
if (options.mixedEOL) { | ||
sdf = sdf.replace(/\r\n/g, '\n'); | ||
sdf = sdf.replace(/\r/g, '\n'); | ||
} else { | ||
// we will find the delimiter in order to be much faster and not use regular expression | ||
let header = sdf.substr(0, 1000); | ||
if (header.indexOf('\r\n') > -1) { | ||
eol = '\r\n'; | ||
} else if (header.indexOf('\r') > -1) { | ||
eol = '\r'; | ||
if (options.eol === undefined) { | ||
options.eol = '\n'; | ||
if (options.mixedEOL) { | ||
sdf = sdf.replace(/\r\n/g, '\n'); | ||
sdf = sdf.replace(/\r/g, '\n'); | ||
} else { | ||
// we will find the delimiter in order to be much faster and not use regular expression | ||
let header = sdf.substr(0, 1000); | ||
if (header.indexOf('\r\n') > -1) { | ||
options.eol = '\r\n'; | ||
} else if (header.indexOf('\r') > -1) { | ||
options.eol = '\r'; | ||
} | ||
} | ||
} | ||
let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol); | ||
let entriesBoundaries = getEntriesBoundaries( | ||
sdf, | ||
`${options.eol}$$$$`, | ||
options.eol, | ||
); | ||
let molecules = []; | ||
@@ -52,68 +57,14 @@ let labels = {}; | ||
let sdfPart = sdf.substring(...entriesBoundaries[i]); | ||
let parts = sdfPart.split(`${eol}>`); | ||
if (parts.length > 0 && parts[0].length > 5) { | ||
let molecule = {}; | ||
let currentLabels = []; | ||
molecule.molfile = parts[0] + eol; | ||
for (let j = 1; j < parts.length; j++) { | ||
let lines = parts[j].split(eol); | ||
let from = lines[0].indexOf('<'); | ||
let to = lines[0].indexOf('>'); | ||
let label = lines[0].substring(from + 1, to); | ||
currentLabels.push(label); | ||
if (!labels[label]) { | ||
labels[label] = { | ||
counter: 0, | ||
isNumeric: dynamicTyping, | ||
keep: false, | ||
}; | ||
if ( | ||
(!exclude || exclude.indexOf(label) === -1) && | ||
(!include || include.indexOf(label) > -1) | ||
) { | ||
labels[label].keep = true; | ||
if (modifiers[label]) { | ||
labels[label].modifier = modifiers[label]; | ||
} | ||
if (forEach[label]) { | ||
labels[label].forEach = forEach[label]; | ||
} | ||
} | ||
} | ||
if (labels[label].keep) { | ||
for (let k = 1; k < lines.length - 1; k++) { | ||
if (molecule[label]) { | ||
molecule[label] += eol + lines[k]; | ||
} else { | ||
molecule[label] = lines[k]; | ||
} | ||
} | ||
if (labels[label].modifier) { | ||
let modifiedValue = labels[label].modifier(molecule[label]); | ||
if (modifiedValue === undefined || modifiedValue === null) { | ||
delete molecule[label]; | ||
} else { | ||
molecule[label] = modifiedValue; | ||
} | ||
} | ||
if (labels[label].isNumeric) { | ||
if ( | ||
!isFinite(molecule[label]) || | ||
molecule[label].match(/^0[0-9]/) | ||
) { | ||
labels[label].isNumeric = false; | ||
} | ||
} | ||
} | ||
let currentLabels = []; | ||
const molecule = getMolecule(sdfPart, labels, currentLabels, options); | ||
if (!molecule) continue; | ||
if (!options.filter || options.filter(molecule)) { | ||
molecules.push(molecule); | ||
// only now we can increase the counter | ||
for (let j = 0; j < currentLabels.length; j++) { | ||
labels[currentLabels[j]].counter++; | ||
} | ||
if (!filter || filter(molecule)) { | ||
molecules.push(molecule); | ||
// only now we can increase the counter | ||
for (let j = 0; j < currentLabels.length; j++) { | ||
labels[currentLabels[j]].counter++; | ||
} | ||
} | ||
} | ||
} | ||
// all numeric fields should be converted to numbers | ||
@@ -120,0 +71,0 @@ for (let label in labels) { |
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 1 instance in 1 package
393703
2
22
748
10
84
1
+ Addeddynamic-typing@^1.0.0
+ Addeddynamic-typing@1.0.1(transitive)
- Removedpumpify@^2.0.1
- Removedsplit2@^4.1.0
- Removedthrough2@^4.0.2
- Removedthrough2-filter@^3.0.0
- Removedduplexify@4.1.3(transitive)
- Removedend-of-stream@1.4.4(transitive)
- Removedinherits@2.0.4(transitive)
- Removedonce@1.4.0(transitive)
- Removedpump@3.0.2(transitive)
- Removedpumpify@2.0.1(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedsplit2@4.2.0(transitive)
- Removedstream-shift@1.0.3(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedthrough2@4.0.2(transitive)
- Removedthrough2-filter@3.1.0(transitive)
- Removedutil-deprecate@1.0.2(transitive)
- Removedwrappy@1.0.2(transitive)