@adntro/raw-dna-dtc-parser
Advanced tools
Comparing version 0.0.12 to 0.1.0
import { ValidationInfo } from './raw.models'; | ||
import { RawFormatNormalizerTransform } from './raw.normalizer'; | ||
export declare function convertLocalDtcFile(filepath: string, outfilepath?: string, gzipOutput?: boolean, rawTransformer?: RawFormatNormalizerTransform): Promise<{ | ||
export declare function convertLocalDtcFile(filepath: string, outfilepath?: string, gzipOutput?: boolean, rawTransformer?: RawFormatNormalizerTransform, highWaterMark?: number): Promise<{ | ||
validation: ValidationInfo; | ||
normalizedLocalFile: string; | ||
}>; |
@@ -5,10 +5,10 @@ "use strict"; | ||
const fs_1 = require("fs"); | ||
const zlib_1 = require("zlib"); | ||
const split2 = require("split2"); | ||
const raw_normalizer_1 = require("./raw.normalizer"); | ||
const stream_1 = require("stream"); | ||
function convertLocalDtcFile(filepath, outfilepath = 'converted.txt', gzipOutput = false, rawTransformer = new raw_normalizer_1.RawFormatNormalizerTransform()) { | ||
const rs = fs_1.createReadStream(filepath, 'utf-8'); | ||
const normalizedLocalFile = `${outfilepath}${gzipOutput && outfilepath.indexOf('.gz') === -1 ? '.gz' : ''}`; | ||
const out = fs_1.createWriteStream(normalizedLocalFile); | ||
const files_1 = require("./utils/files"); | ||
function convertLocalDtcFile(filepath, outfilepath = 'converted.txt', gzipOutput = false, rawTransformer = new raw_normalizer_1.RawFormatNormalizerTransform(), highWaterMark = 256 * 1024) { | ||
const rs = fs_1.createReadStream(filepath, { encoding: 'utf-8', highWaterMark }); | ||
const normalizedLocalFile = `${outfilepath}${false && gzipOutput && outfilepath.indexOf('.gz') === -1 ? '.gz' : ''}`; | ||
//const normalizedLocalFile = `${outfilepath}`; | ||
const out = fs_1.createWriteStream(normalizedLocalFile, { encoding: 'utf-8', highWaterMark }); | ||
const p = new Promise((resolve, reject) => { | ||
@@ -18,3 +18,3 @@ rs | ||
.pipe(rawTransformer) | ||
.pipe(gzipOutput ? zlib_1.createGzip() : new stream_1.PassThrough()) | ||
// .pipe(gzipOutput ? createGzip() : new PassThrough()) | ||
.pipe(out); | ||
@@ -28,5 +28,12 @@ rs.on('error', err => reject(err)); | ||
p.finally(() => rs.close()); | ||
return p; | ||
return p.then(({ validation, normalizedLocalFile }) => { | ||
if (gzipOutput) { | ||
return files_1.gzip(normalizedLocalFile).then(normalizedLocalFileGz => ({ validation, normalizedLocalFile: normalizedLocalFileGz })); | ||
} | ||
else { | ||
return { validation, normalizedLocalFile }; | ||
} | ||
}); | ||
} | ||
exports.convertLocalDtcFile = convertLocalDtcFile; | ||
//# sourceMappingURL=raw.converter.js.map |
@@ -19,2 +19,3 @@ /// <reference types="node" /> | ||
debug: boolean; | ||
checkBuild: boolean; | ||
} | ||
@@ -26,2 +27,3 @@ export declare class RawFormatNormalizerTransform extends Transform { | ||
debug: boolean; | ||
checkBuild: boolean; | ||
warnings: Set<string>; | ||
@@ -28,0 +30,0 @@ lineCount: number; |
@@ -50,2 +50,3 @@ "use strict"; | ||
this.debug = false; | ||
this.checkBuild = false; | ||
this.warnings = new Set(); | ||
@@ -69,2 +70,4 @@ this.lineCount = 0; | ||
this.debug = true; | ||
if ((opts === null || opts === void 0 ? void 0 : opts.checkBuild) === true) | ||
this.checkBuild = true; | ||
this.on(exports.EVENTS.HEADER, header => { | ||
@@ -85,6 +88,6 @@ this.format = guessFormat(header); | ||
//emit header | ||
this.push('#rsid\tchr\tposition\tbase\n'); | ||
this.push('#rsid\tchromosome\tposition\tgenotype\n'); | ||
} | ||
this.lineCount++; | ||
const str = String(chunk); | ||
const str = typeof chunk === 'string' ? chunk : String(chunk); | ||
if (!this.genotypeStarted) { | ||
@@ -102,2 +105,3 @@ if (str[0] === '#' || str.match(/rsid|chromosome|chr/i) !== null) { | ||
const snp = raw_line_parser_1.convertLine2Snp(str); | ||
//const snp: Snp = { chr: '1', position: 1, rsid: 'rskjo', nocall: false, a1: 'A', a2: 'T'}; | ||
if (snp.chr === 'XY') | ||
@@ -133,7 +137,9 @@ throw new Error('Skip pseudoautosomal XY'); | ||
// build 37 38 | ||
const build = genome_build_1.checkBuildForSnp(snp); | ||
if (build === 'b37') | ||
this.snpInfo.b37++; | ||
else if (build === 'b38') | ||
this.snpInfo.b38++; | ||
if (this.checkBuild) { | ||
const build = genome_build_1.checkBuildForSnp(snp); | ||
if (build === 'b37') | ||
this.snpInfo.b37++; | ||
else if (build === 'b38') | ||
this.snpInfo.b38++; | ||
} | ||
} | ||
@@ -161,11 +167,13 @@ catch (e) { | ||
} | ||
if (this.snpInfo.b37 > 0 && this.snpInfo.b38 > 0) { | ||
errors.push(raw_errors_1.ERROR_GENOME_BUILD_MIX); | ||
if (this.checkBuild) { | ||
if (this.snpInfo.b37 > 0 && this.snpInfo.b38 > 0) { | ||
errors.push(raw_errors_1.ERROR_GENOME_BUILD_MIX); | ||
} | ||
else if (this.snpInfo.b37 === 0 && this.snpInfo.b38 === 0) { | ||
errors.push(raw_errors_1.ERROR_GENOME_BUILD_NOT_DETECTED); | ||
} | ||
else if (this.snpInfo.b37 < 25 && this.snpInfo.b38 < 25) { | ||
errors.push(raw_errors_1.ERROR_GENOME_BUILD_NOT_ENOUGH); | ||
} | ||
} | ||
else if (this.snpInfo.b37 === 0 && this.snpInfo.b38 === 0) { | ||
errors.push(raw_errors_1.ERROR_GENOME_BUILD_NOT_DETECTED); | ||
} | ||
else if (this.snpInfo.b37 < 25 && this.snpInfo.b38 < 25) { | ||
errors.push(raw_errors_1.ERROR_GENOME_BUILD_NOT_ENOUGH); | ||
} | ||
if (this.snpInfo.hetRatio > 60 || this.snpInfo.hetRatio < 10) { | ||
@@ -172,0 +180,0 @@ errors.push(raw_errors_1.ERROR_HET_RATIO); |
import { Snp } from '../raw.models'; | ||
export declare function cleanGenotypeLine(line: string): string; | ||
/** | ||
* mapper['X'] = 23 | ||
* 2010-08-Y-3314,0,0,-- | ||
* rs4475691 1 846808 CC | ||
* "rs11240777","1","798959","AG" | ||
* rs199474699 26 15990 C C | ||
*/ | ||
export declare function convertLine2Snp(line: string): Snp; | ||
/** | ||
mapper['X'] = 23 | ||
mapper['Y'] = 24 | ||
@@ -9,2 +16,2 @@ mapper['XY'] = 25 | ||
*/ | ||
export declare function convertLine2Snp(line: string): Snp; | ||
export declare function convertLine2SnpGeneric(line: string): Snp; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.convertLine2Snp = exports.cleanGenotypeLine = void 0; | ||
exports.convertLine2SnpGeneric = exports.convertLine2Snp = exports.cleanGenotypeLine = void 0; | ||
const snp_utils_1 = require("./snp-utils"); | ||
@@ -10,3 +10,13 @@ function cleanGenotypeLine(line) { | ||
/** | ||
* mapper['X'] = 23 | ||
* 2010-08-Y-3314,0,0,-- | ||
* rs4475691 1 846808 CC | ||
* "rs11240777","1","798959","AG" | ||
* rs199474699 26 15990 C C | ||
*/ | ||
function convertLine2Snp(line) { | ||
return convertLine2SnpGeneric(line); | ||
} | ||
exports.convertLine2Snp = convertLine2Snp; | ||
/** | ||
mapper['X'] = 23 | ||
mapper['Y'] = 24 | ||
@@ -16,3 +26,3 @@ mapper['XY'] = 25 | ||
*/ | ||
function convertLine2Snp(line) { | ||
function convertLine2SnpGeneric(line) { | ||
if (('' + line).length > 60) | ||
@@ -80,3 +90,3 @@ throw new Error('invalid snp line length'); | ||
} | ||
exports.convertLine2Snp = convertLine2Snp; | ||
exports.convertLine2SnpGeneric = convertLine2SnpGeneric; | ||
//# sourceMappingURL=raw-line-parser.js.map |
{ | ||
"name": "@adntro/raw-dna-dtc-parser", | ||
"version": "0.0.12", | ||
"version": "0.1.0", | ||
"description": "DNA file reader (in DTC formats) and parser", | ||
@@ -5,0 +5,0 @@ "author": "Adntro Genetics SL", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
60607
33
1017
2