Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

bionode-vcf

Package Overview
Dependencies
Maintainers
2
Versions
3
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bionode-vcf - npm Package Compare versions

Comparing version 1.1.5 to 1.3.0

CODE_OF_CONDUCT.md

263

lib/index.js

@@ -9,148 +9,165 @@ /*

var fs = require('fs')
var readline = require('readline')
var Stream = require('stream')
var events = require('events')
var zlib = require('zlib')
var unzip = require('unzip-stream')
var fs = require('fs');
var readline = require('readline');
var Stream = require('stream');
var events = require('events');
var vcf = new events.EventEmitter()
var vcf = new events.EventEmitter();
function parseStream (instream, extension) {
var rl
var numSamples = 0
var sampleIndex = {}
var vcfAttrib = {}
var outstream = new Stream()
vcf.read = function (path) {
switch (extension) {
case 'gz':
rl = readline.createInterface({
input: instream.pipe(zlib.createGunzip())
})
break
case 'zip':
rl = readline.createInterface({
input: instream.pipe(unzip.Parse())
})
break
case 'vcf':
rl = readline.createInterface(instream, outstream)
break
default:
var err = new Error('File format not supported')
vcf.emit('error', err)
}
var instream = fs.createReadStream(path);
var outstream = new Stream();
var rl = readline.createInterface(instream, outstream);
rl.on('line', function (line) {
// check if line starts with hash and use them
if (line.indexOf('#') === 0) {
// ##fileformat=VCFv4.1
if (!vcfAttrib.vcf_v) {
vcfAttrib.vcf_v = line.match(/^##fileformat=/) ? line.split('=')[1] : null
}
var numSamples = 0;
var sampleIndex = {};
var vcfAttrib = {};
// ##samtoolsVersion=0.1.19-44428cd
if (!vcfAttrib.samtools) {
vcfAttrib.samtools = line.match(/^##samtoolsVersion=/) ? line.split('=')[1] : null
}
// ##reference=file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa
if (!vcfAttrib.refseq) {
vcfAttrib.refseq = line.match((/^##reference=file:/)) ? line.split('=')[1] : null
}
rl.on('line', function(line) {
// #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample1\tsample2\tsample3
// set number of samples in vcf file
if (line.match(/^#CHROM/)) {
var sampleinfo = line.split('\t')
numSamples = sampleinfo.length - 9
// check if line starts with hash and use them
if (line.indexOf('#') == 0) {
for (var i = 0; i < numSamples; i++) {
sampleIndex[i] = sampleinfo[9 + i]
}
}
} else { // go through remaining lines
// split line by tab character
var info = line.split('\t')
//##fileformat=VCFv4.1
if (!vcfAttrib.vcf_v) {
vcfAttrib.vcf_v = line.match(/^##fileformat=/) ? line.split('=')[1] : null ;
}
if (info.length < 9) {
var err = new Error('number of columns in the file are less than expected in vcf')
vcf.emit('error', err)
}
//##samtoolsVersion=0.1.19-44428cd
if (!vcfAttrib.samtools) {
vcfAttrib.samtools = line.match(/^##samtoolsVersion=/) ? line.split('=')[1] : null ;
}
// format information ids
var formatIds = info[8].split(':')
//##reference=file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa
if (!vcfAttrib.refseq) {
vcfAttrib.refseq = line.match((/^##reference=file:/)) ? line.split('=')[1] : null ;
}
// parse the sample information
var sampleObject = []
for (var j = 0; j < numSamples; j++) {
var sampleData = {}
sampleData['NAME'] = sampleIndex[j]
var formatParts = info[9].split(':')
for (var k = 0; k < formatParts.length; k++) {
sampleData[formatIds[k]] = formatParts[k]
}
sampleObject.push(sampleData)
}
//#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 sample3
//set number of samples in vcf file
if (line.match(/^#CHROM/)) {
var sampleinfo = line.split('\t');
numSamples = sampleinfo.length - 9;
// parse the variant call information
var varInfo = info[7].split(';')
var infoObject = {}
for (var i = 0; i < numSamples; i++) {
sampleIndex[i] = sampleinfo[9 + i];
}
}
// check if the variant is INDEL or SNP
// and assign the specific type of variation identified
var type
var typeInfo
if (varInfo[0].match(/^INDEL$/)) {
type = 'INDEL'
varInfo.shift()
if (info[3].length > info[4].length) {
typeInfo = 'deletion'
} else if (info[3].length < info[4].length) {
typeInfo = 'insertion'
} else if (info[3].length === info[4].length) {
typeInfo = 'substitution - multi'
}
// go through remaining lines
else {
// split line by tab character
var info = line.split('\t');
} else {
type = 'SNP'
if (info[3].length === 1) {
typeInfo = 'substitution'
} else if (info[3].length > 1) {
typeInfo = 'substitution - multi'
}
}
infoObject['VAR'] = type
infoObject['VARINFO'] = typeInfo
if (info.length < 9) {
var err = new Error('number of columns in the file are less than expected in vcf');
vcf.emit('error', err);
}
// variant info added to object
for (var l = 0; l < varInfo.length; l++) {
var pair = varInfo[l].split('=')
infoObject[pair[0]] = pair[1]
}
// format information ids
var formatIds = info[8].split(':');
// parse the variant information
var varData = {
chr: info[0],
pos: info[1],
id: info[2],
ref: info[3],
alt: info[4],
qual: info[5],
filter: info[6],
varinfo: infoObject,
sampleinfo: sampleObject,
attributes: vcfAttrib
}
// parse the sample information
var sampleObject = [];
for (var j = 0; j < numSamples; j++) {
var sampleData = {};
sampleData['NAME'] = sampleIndex[j];
var formatParts = info[9].split(':');
for (var k = 0; k < formatParts.length; k++) {
sampleData[formatIds[k]] = formatParts[k];
}
sampleObject.push(sampleData)
}
// console.log('Variant data',varData);
vcf.emit('data', varData)
}
})
// parse the variant call information
var varInfo = info[7].split(';');
var infoObject = {};
rl.on('close', function () {
vcf.emit('end')
})
}
// check if the variant is INDEL or SNP
// and assign the specific type of variation identified
var type;
var typeInfo;
if (varInfo[0].match(/^INDEL$/)) {
type = 'INDEL';
varInfo.shift();
if (info[3].length > info[4].length) {
typeInfo = 'deletion';
}
else if (info[3].length < info[4].length) {
typeInfo = 'insertion';
}
else if (info[3].length == info[4].length) {
typeInfo = 'substitution - multi';
}
}
else {
type = 'SNP';
if (info[3].length == 1) {
typeInfo = 'substitution';
}
else if (info[3].length > 1) {
typeInfo = 'substitution - multi';
}
}
infoObject['VAR'] = type;
infoObject['VARINFO'] = typeInfo;
// To read file in stream and parse it
vcf.read = function (path) {
var instream = fs.createReadStream(path)
var extension = path.split('.').pop()
// variant info added to object
for (var l = 0; l < varInfo.length; l++) {
var pair = varInfo[l].split("=");
infoObject[pair[0]] = pair[1];
}
parseStream(instream, extension)
// parse the variant information
var varData = {
chr: info[0],
pos: info[1],
id: info[2],
ref: info[3],
alt: info[4],
qual: info[5],
filter: info[6],
varinfo: infoObject,
sampleinfo: sampleObject,
attributes: vcfAttrib
};
return this
}
//console.log('Variant data',varData);
vcf.emit('data', varData);
// To parse stream data sent by user
vcf.readStream = function (instream, extension = 'vcf') {
parseStream(instream, extension)
}
return this
}
});
rl.on('close', function () {
vcf.emit('end');
});
return this;
};
module.exports = vcf;
module.exports = vcf
{
"name": "bionode-vcf",
"description": "a vcf parser in javascript",
"version": "1.1.5",
"homepage": "https://github.com/shyamrallapalli/bionode-vcf",
"version": "1.3.0",
"homepage": "https://github.com/bionode/bionode-vcf",
"author": {

@@ -12,6 +12,6 @@ "name": "Shyam Rallapalli",

"type": "git",
"url": "git://github.com/shyamrallapalli/bionode-vcf.git"
"url": "git://github.com/bionode/bionode-vcf.git"
},
"bugs": {
"url": "https://github.com/shyamrallapalli/bionode-vcf/issues"
"url": "https://github.com/bionode/bionode-vcf/issues"
},

@@ -21,3 +21,3 @@ "licenses": [

"type": "MIT",
"url": "https://github.com/shyamrallapalli/bionode-vcf/blob/master/LICENSE"
"url": "https://github.com/bionode/bionode-vcf/blob/master/LICENSE"
}

@@ -27,12 +27,15 @@ ],

"scripts": {
"test": "mocha"
"test": "standard && dependency-check . && mocha"
},
"dependencies": {
"events": "^1.1.0",
"readline": "^1.2.1",
"stream": "0.0.2"
"events": "^1.1.1",
"readline": "^1.3.0",
"stream": "0.0.2",
"unzip-stream": "^0.3.0"
},
"devDependencies": {
"chai": "^3.4.1",
"mocha": "^2.3.4"
"chai": "^4.0.1",
"dependency-check": "^2.8.0",
"mocha": "^3.4.2",
"standard": "^10.0.2"
},

@@ -39,0 +42,0 @@ "keywords": [

@@ -0,20 +1,55 @@

<p align="center">
<a href="http://bionode.io">
<img height="200" width="200" title="bionode" alt="bionode logo" src="https://rawgithub.com/bionode/bionode/master/docs/bionode-logo.min.svg"/>
</a>
<br/>
<a href="http://bionode.io/">bionode.io</a>
</p>
# bionode-vcf
[![NPM version](http://img.shields.io/npm/v/bionode-vcf.svg)](https://www.npmjs.org/package/bionode-vcf)
[![Build Status](https://secure.travis-ci.org/bionode/bionode-vcf.png?branch=master)](http://travis-ci.org/bionode/bionode-vcf)
> a vcf parser in javascript
## Getting Started
[![npm](https://img.shields.io/npm/v/bionode-vcf.svg?style=flat-square)](http://npmjs.org/package/bionode-vcf)
[![Travis](https://img.shields.io/travis/bionode/bionode-vcf.svg?style=flat-square)](https://travis-ci.org/bionode/bionode-vcf)
[![Coveralls](https://img.shields.io/coveralls/bionode/bionode-vcf.svg?style=flat-square)](http://coveralls.io/r/bionode/bionode-vcf)
[![Dependencies](http://img.shields.io/david/bionode/bionode-vcf.svg?style=flat-square)](http://david-dm.org/bionode/bionode-vcf)
[![npm](https://img.shields.io/npm/dt/bionode-vcf.svg?style=flat-square)](https://www.npmjs.com/package/bionode-vcf)
[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg?style=flat-square)](https://gitter.im/bionode/bionode)
#### installation
Install the module with:
## Install
You need to install the latest Node.JS first, please check [nodejs.org](http://nodejs.org) or do the following:
```bash
npm install bionode-vcf
# Ubuntu
sudo apt-get install npm
# Mac
brew install node
# Both
npm install -g n
n stable
```
#### usage
To use `bionode-vcf` as a command line tool, you can install it globally with `-g`.
```bash
npm install bionode-vcf -g
```
Or, if you want to use it as a JavaScript library, you need to install it in your local project folder inside the `node_modules` directory by doing the same command **without** `-g`.
```bash
npm i bionode-vcf # 'i' can be used as shortcut to 'install'
```
### Usage
#### vcf.read
- `vcf.read` takes params: `path`
- The supported filetypes are `vcf`, `zip` and `gz`.
```javascript

@@ -37,40 +72,37 @@ var vcf = require('bionode-vcf');

## Documentation
VCF format specifications and more information about the fileds can be found at
[1000 genomes webpage](http://www.1000genomes.org/wiki/analysis/variant%20call%20format/vcf-variant-call-format-version-41) and
[samtools github page](https://github.com/samtools/hts-specs)
#### vcf.readStream
- `vcf.readStream` takes params: `stream` and `extension`
- The supported extension are `vcf`, `zip` and `gz`.
## Contributing
```javascript
var vcf = require('bionode-vcf');
var fileStream = s3.getObject({
Bucket: [BUCKETNAME],
Key: [FILENAME]
}).createReadStream(); // or stream data from any other source
All contributions are welcome.
vcf.read(filestream, 'zip'); // default value is `vcf`
vcf.on('data', function(feature){
console.log(feature);
})
## Support
vcf.on('end', function(){
console.log('end of file')
})
If you have any problem or suggestion please open an issue [here](https://github.com/bionode/bionode-vcf/issues).
vcf.on('error', function(err){
console.error('it\'s not a vcf', err)
})
## License
```
The MIT License
## Documentation
Copyright (c) 2015, Shyam Rallapalli and Martin Page
VCF format specifications and more information about the fileds can be found at
[1000 genomes webpage](http://www.1000genomes.org/wiki/analysis/variant%20call%20format/vcf-variant-call-format-version-41) and
[samtools github page](https://github.com/samtools/hts-specs)
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
## Contributing
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
We welcome all kinds of contributions at all levels of experience, please read the [CONTRIBUTING.md](CONTRIBUTING.md) to get started!

@@ -0,1 +1,2 @@

/* global describe it */
/*

@@ -10,125 +11,168 @@ * bionode-vcf

// chai is an assertion library
//var chai = require('chai');
// var chai = require('chai');
// @see http://chaijs.com/api/assert/
//var assert = chai.assert;
// var assert = chai.assert;
// register alternative styles
// @see http://chaijs.com/api/bdd/
//chai.expect();
//chai.should();
// chai.expect();
// chai.should();
// requires your main app (specified in index.js)
var VCF = require('../lib/index');
var path = require('path');
var assert = require('assert');
var filePath = path.join(__dirname, 'sample.vcf');
var VCF = require('../lib/index')
var fs = require('fs')
var path = require('path')
var assert = require('assert')
var allFeatures = [];
var allFeatures = []
describe('VCF', function(){
describe('.read', function(){
it('should read without error', function(done){
describe('VCF', function () {
describe('.read .vcf file', function () {
var filePath = path.join(__dirname, 'sample.vcf')
function onFeature(vcf) {
allFeatures.push(vcf);
}
it('should read without error', function (finish) {
function onFeature (vcf) {
allFeatures.push(vcf)
}
VCF.read(filePath).on('data', onFeature).on('end', done);
VCF.read(filePath).on('data', onFeature).once('end', finish)
})
it('should look like a valid output', function () {
assert.notStrictEqual(allFeatures, validOutput)
})
})
});
describe('.read .gz file', function () {
var filePath = path.join(__dirname, 'sample.gz')
it('should read without error', function (done) {
function onFeature (vcf) {
allFeatures.push(vcf)
}
VCF.read(filePath).on('data', onFeature).once('end', done)
})
it('should look like a valid output', function () {
assert.notStrictEqual(allFeatures, validOutput);
assert.notStrictEqual(allFeatures, validOutput)
})
});
});
})
describe('.read .zip file', function () {
var filePath = path.join(__dirname, 'sample.zip')
it('should read without error', function (done) {
function onFeature (vcf) {
allFeatures.push(vcf)
}
VCF.read(filePath).on('data', onFeature).once('end', done)
})
it('should look like a valid output', function () {
assert.notStrictEqual(allFeatures, validOutput)
})
})
describe('.readStream', function () {
var filePath = path.join(__dirname, 'sample.vcf')
var fileStream = fs.createReadStream(filePath)
it('should read without error', function (finish) {
function onFeature (vcf) {
allFeatures.push(vcf)
}
VCF.readStream(fileStream).on('data', onFeature).once('end', finish)
})
it('should look like a valid output', function () {
assert.notStrictEqual(allFeatures, validOutput)
})
})
})
var validOutput = [
{ chr: 'Cf746836_TGAC_s1v1_scaffold_4',
pos: '5607',
id: '.',
ref: 'G',
alt: 'C',
qual: '18.1',
filter: '.',
varinfo:
{ VAR: 'SNP',
VARINFO: 'substitution',
DP: '6',
VDB: '6.560000e-02',
RPB: '1.427508e+00',
AF1:' 0.5',
AC1: '1',
DP4: '3,1,1,1',
MQ: '60',
FQ: '21',
PV4: '1,0.0023,1,1' },
sampleinfo:
[ { NAME: 'foxley_wood1_bwa-mem-sorted.bam',
GT: '0/1',
PL: '48,0,123',
GQ: '51' } ],
attributes:
{ vcfver: 'VCFv4.1',
samtools: '0.1.19-44428cd',
reference: 'file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa' }
},
{ chr: 'Cf746836_TGAC_s1v1_scaffold_12',
pos: '672',
id: '.',
ref: 'CAAA',
alt: 'CAA',
qual: '128',
filter: '.',
varinfo:
{ VAR: 'INDEL',
VARINFO: 'deletion',
IS: '16,0.727273',
DP: '22',
VDB: '1.896486e-01',
AF1: '1',
AC1: '2',
DP4: '0,0,0,20',
MQ: '60',
FQ: '-94.5' },
sampleinfo:
[ { NAME: 'foxley_wood1_bwa-mem-sorted.bam',
GT: '1/1',
PL: '169,60,0',
GQ: '99' } ],
attributes:
{ vcfver: 'VCFv4.1',
samtools: '0.1.19-44428cd',
reference: 'file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa' }
},
{ chr: 'Cf746836_TGAC_s1v1_scaffold_12',
pos: '2911',
id: '.',
ref: 'ATA',
alt: 'ATACTCGGTA',
qual: '214',
filter: '.',
varinfo:
{ VAR: 'INDEL',
VARINFO: 'insertion',
IS: '16,0.727273',
DP: '22',
VDB: '3.802706e-02',
AF1: '1',
AC1: '2',
DP4: '0,0,9,8',
MQ: '60',
FQ: '-85.5' },
sampleinfo:
[ { NAME: 'foxley_wood1_bwa-mem-sorted.bam',
GT: '1/1',
PL: '255,51,0',
GQ: '99' } ],
attributes:
{ vcfver: 'VCFv4.1',
samtools: '0.1.19-44428cd',
reference: 'file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa' }
}
];
{ chr: 'Cf746836_TGAC_s1v1_scaffold_4',
pos: '5607',
id: '.',
ref: 'G',
alt: 'C',
qual: '18.1',
filter: '.',
varinfo:
{ VAR: 'SNP',
VARINFO: 'substitution',
DP: '6',
VDB: '6.560000e-02',
RPB: '1.427508e+00',
AF1: ' 0.5',
AC1: '1',
DP4: '3,1,1,1',
MQ: '60',
FQ: '21',
PV4: '1,0.0023,1,1' },
sampleinfo:
[ { NAME: 'foxley_wood1_bwa-mem-sorted.bam',
GT: '0/1',
PL: '48,0,123',
GQ: '51' } ],
attributes:
{ vcfver: 'VCFv4.1',
samtools: '0.1.19-44428cd',
reference: 'file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa' }
},
{ chr: 'Cf746836_TGAC_s1v1_scaffold_12',
pos: '672',
id: '.',
ref: 'CAAA',
alt: 'CAA',
qual: '128',
filter: '.',
varinfo:
{ VAR: 'INDEL',
VARINFO: 'deletion',
IS: '16,0.727273',
DP: '22',
VDB: '1.896486e-01',
AF1: '1',
AC1: '2',
DP4: '0,0,0,20',
MQ: '60',
FQ: '-94.5' },
sampleinfo:
[ { NAME: 'foxley_wood1_bwa-mem-sorted.bam',
GT: '1/1',
PL: '169,60,0',
GQ: '99' } ],
attributes:
{ vcfver: 'VCFv4.1',
samtools: '0.1.19-44428cd',
reference: 'file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa' }
},
{ chr: 'Cf746836_TGAC_s1v1_scaffold_12',
pos: '2911',
id: '.',
ref: 'ATA',
alt: 'ATACTCGGTA',
qual: '214',
filter: '.',
varinfo:
{ VAR: 'INDEL',
VARINFO: 'insertion',
IS: '16,0.727273',
DP: '22',
VDB: '3.802706e-02',
AF1: '1',
AC1: '2',
DP4: '0,0,9,8',
MQ: '60',
FQ: '-85.5' },
sampleinfo:
[ { NAME: 'foxley_wood1_bwa-mem-sorted.bam',
GT: '1/1',
PL: '255,51,0',
GQ: '99' } ],
attributes:
{ vcfver: 'VCFv4.1',
samtools: '0.1.19-44428cd',
reference: 'file://../index/Chalara_fraxinea_TGAC_s1v1_scaffolds.fa' }
}
]

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc