transcript-parser
Advanced tools
Comparing version 0.6.0 to 0.7.0
@@ -8,3 +8,3 @@ 'use strict'; | ||
const Promise = require('bluebird'); | ||
const readline = require('readline'); | ||
const es = require('event-stream'); | ||
@@ -24,3 +24,3 @@ | ||
regex: { | ||
newLine: /\r?\n/, | ||
newLine: /(?:\r?\n)+/, | ||
action: /\([A-Z\ ]+\)\ ?/, | ||
@@ -48,4 +48,3 @@ speaker: /^((?:\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?)?[A-Z\d\ \/,.\-\(\)]+?)(?:\ ?\[[A-z\ ]+\])? ?:\ ?/, | ||
lines = _.map(lines, line => { | ||
if(line.length <= 0) return ''; | ||
lines = _.reduce(lines, (out, line) => { | ||
if(this.settings.removeActions) | ||
@@ -57,6 +56,6 @@ line = removeAll(line, this.regex.action); | ||
line = removeAll(line, this.regex.timestamp); | ||
return line; | ||
}); | ||
lines = _.filter(lines, line => (line.length > 0)); //Remove newly blank lines | ||
if(line.length > 0) out.push(line); | ||
return out; | ||
}, []); | ||
@@ -101,3 +100,2 @@ //Output object | ||
proto.parseOne = function(transcript, cb) { | ||
const hasCallback = (typeof cb !== 'undefined' && cb !== null); | ||
//Output object | ||
@@ -110,20 +108,19 @@ const output = {}; | ||
let speaker = 'none'; //Current speaker | ||
let ignore = false, match = null; | ||
return Promise.try(() => { | ||
//Remove blank lines | ||
let lines = _.filter(transcript.split(this.regex.newLine), line => (line.length > 0)); | ||
let ignore = false, match = null; | ||
return Promise | ||
.try(() => { | ||
return Promise.reduce(transcript.split(this.regex.newLine), (out, line) => { | ||
if(this.settings.removeActions) | ||
line = removeAll(line, this.regex.action); | ||
if(this.settings.removeAnnotations) | ||
line = removeAll(line, this.regex.annotation); | ||
else if(this.settings.removeTimestamps) | ||
line = removeAll(line, this.regex.timestamp); | ||
lines = _.map(lines, line => { | ||
if(this.settings.removeActions) | ||
line = removeAll(line, this.regex.action); | ||
if(this.settings.removeAnnotations) | ||
line = removeAll(line, this.regex.annotation); | ||
else if(this.settings.removeTimestamps) | ||
line = removeAll(line, this.regex.timestamp); | ||
return line; | ||
}); | ||
lines = _.filter(lines, line => (line.length > 0)); //Remove newly blank lines | ||
_.each(lines, (line) => { | ||
if(line.length > 0) out.push(line); | ||
return out; | ||
}, []); | ||
}) | ||
.each(line => { | ||
if((match = this.regex.speaker.exec(line)) !== null) { | ||
@@ -147,9 +144,7 @@ //Regex match | ||
output.order.push(speaker); | ||
}); | ||
if(hasCallback) cb(null, output); | ||
return Promise.resolve(output); | ||
}).catch(err => { | ||
if(hasCallback) cb(err); | ||
else return Promise.reject(err); | ||
}); | ||
}) | ||
.then(() => { | ||
return Promise.resolve(output); | ||
}) | ||
.asCallback(cb); | ||
}; | ||
@@ -255,5 +250,2 @@ | ||
proto.parseStream = function(inputStream, cb) { | ||
const lineStream = readline.createInterface({ | ||
input: inputStream | ||
}); | ||
//Output object | ||
@@ -269,27 +261,30 @@ const output = {}; | ||
lineStream.on('line', line => { | ||
if(line === null) return; | ||
line = this.filterLine(line); | ||
if(!line) return; | ||
inputStream | ||
.pipe(es.split(this.regex.newLine)) | ||
.pipe(es.mapSync(line => { | ||
if(line === null) return; | ||
line = this.filterLine(line); | ||
if(!line) return; | ||
if((match = this.regex.speaker.exec(line)) !== null) { | ||
speaker = match[1].trim(); //Regex match - is speaker | ||
//Remove the speaker from the line | ||
line = line.replace(this.regex.speaker, ''); | ||
ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
} | ||
if((match = this.regex.speaker.exec(line)) !== null) { | ||
speaker = match[1].trim(); //Regex match - is speaker | ||
//Remove the speaker from the line | ||
line = line.replace(this.regex.speaker, ''); | ||
ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
} | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//If the speaker is defined or the setting to remove undefined speakers is false | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
output.order.push(speaker); | ||
}).on('close', () => { | ||
return cb(null, output); | ||
}); | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//If the speaker is defined or the setting to remove undefined speakers is false | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
output.order.push(speaker); | ||
})) | ||
.on('close', () => { | ||
return cb(null, output); | ||
}); | ||
@@ -296,0 +291,0 @@ }; |
{ | ||
"name": "transcript-parser", | ||
"version": "0.6.0", | ||
"version": "0.7.0", | ||
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.", | ||
@@ -39,4 +39,5 @@ "main": "app.js", | ||
"bluebird": "^3.3.4", | ||
"event-stream": "^3.3.4", | ||
"lodash": "^4.9.0" | ||
} | ||
} |
@@ -5,2 +5,3 @@ transcript-parser | ||
[![Coverage Status](https://coveralls.io/repos/github/willshiao/transcript-parser/badge.svg?branch=master)](https://coveralls.io/github/willshiao/transcript-parser?branch=master) | ||
[![npm](https://img.shields.io/npm/v/transcript-parser.svg?maxAge=2592000)](https://www.npmjs.com/package/transcript-parser) | ||
@@ -11,4 +12,6 @@ ## Description | ||
Tested for Node.js versions >= 4.4.6 | ||
Tests can be run with `npm test` and a benchmark can be run with `npm run benchmark`. For a full coverage report using [Istanbul](https://github.com/gotwarlost/istanbul), run `npm run travis-test`. | ||
Tested for Node.js >= v4.4.6 | ||
## Usage | ||
@@ -18,27 +21,30 @@ | ||
const fs = require('fs'); | ||
const TranscriptParser = require('transcript-parser'); | ||
const tp = new TranscriptParser(); | ||
//Synchronous example | ||
const parsed = tp.parseOneSync(fs.readFileSync('transcript.txt', {encoding: 'UTF-8'})); | ||
```node | ||
'use strict'; | ||
const fs = require('fs'); | ||
const TranscriptParser = require('transcript-parser'); | ||
const tp = new TranscriptParser(); | ||
//Synchronous example | ||
const parsed = tp.parseOneSync(fs.readFileSync('transcript.txt', 'utf8')); | ||
console.log(parsed); | ||
//Asynchronous example | ||
fs.readFile('transcript.txt', (err, data) => { | ||
if(err) return console.error('Error:', err); | ||
tp.parseOne(data, (err, parsed) => { | ||
if(err) return console.error('Error:', err); | ||
console.log(parsed); | ||
//Asynchronous example | ||
fs.readFile('transcript.txt', (err, data) => { | ||
if(err) return console.error('Error:', err); | ||
tp.parseOne(data, (err, parsed => { | ||
if(err) return console.error('Error:', err); | ||
console.log(parsed); | ||
})); | ||
}); | ||
//Stream example | ||
const stream = fs.createReadStream('transcript.txt', 'utf8'); | ||
tp.parseStream(stream, (err, parsed) => { | ||
if(err) return console.error('Error:', err); | ||
console.log(parsed); | ||
}); | ||
})); | ||
}); | ||
//Stream example | ||
const stream = fs.createReadStream('transcript.txt', 'utf8'); | ||
tp.parseStream(stream, (err, parsed) => { | ||
if(err) return console.error('Error:', err); | ||
console.log(parsed); | ||
}); | ||
``` | ||
## Config | ||
@@ -156,2 +162,1 @@ | ||
+ A callback to be executed on function completion or error. | ||
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
159
0
17836
3
271
+ Addedevent-stream@^3.3.4
+ Addedduplexer@0.1.2(transitive)
+ Addedevent-stream@3.3.5(transitive)
+ Addedfrom@0.1.7(transitive)
+ Addedmap-stream@0.0.7(transitive)
+ Addedpause-stream@0.0.11(transitive)
+ Addedsplit@1.0.1(transitive)
+ Addedstream-combiner@0.2.2(transitive)
+ Addedthrough@2.3.8(transitive)