Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

transcript-parser

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

transcript-parser - npm Package Compare versions

Comparing version 0.6.0 to 0.7.0

109

lib/parser.js

@@ -8,3 +8,3 @@ 'use strict';

const Promise = require('bluebird');
const readline = require('readline');
const es = require('event-stream');

@@ -24,3 +24,3 @@

regex: {
newLine: /\r?\n/,
newLine: /(?:\r?\n)+/,
action: /\([A-Z\ ]+\)\ ?/,

@@ -48,4 +48,3 @@ speaker: /^((?:\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?)?[A-Z\d\ \/,.\-\(\)]+?)(?:\ ?\[[A-z\ ]+\])? ?:\ ?/,

lines = _.map(lines, line => {
if(line.length <= 0) return '';
lines = _.reduce(lines, (out, line) => {
if(this.settings.removeActions)

@@ -57,6 +56,6 @@ line = removeAll(line, this.regex.action);

line = removeAll(line, this.regex.timestamp);
return line;
});
lines = _.filter(lines, line => (line.length > 0)); //Remove newly blank lines
if(line.length > 0) out.push(line);
return out;
}, []);

@@ -101,3 +100,2 @@ //Output object

proto.parseOne = function(transcript, cb) {
const hasCallback = (typeof cb !== 'undefined' && cb !== null);
//Output object

@@ -110,20 +108,19 @@ const output = {};

let speaker = 'none'; //Current speaker
let ignore = false, match = null;
return Promise.try(() => {
//Remove blank lines
let lines = _.filter(transcript.split(this.regex.newLine), line => (line.length > 0));
let ignore = false, match = null;
return Promise
.try(() => {
return Promise.reduce(transcript.split(this.regex.newLine), (out, line) => {
if(this.settings.removeActions)
line = removeAll(line, this.regex.action);
if(this.settings.removeAnnotations)
line = removeAll(line, this.regex.annotation);
else if(this.settings.removeTimestamps)
line = removeAll(line, this.regex.timestamp);
lines = _.map(lines, line => {
if(this.settings.removeActions)
line = removeAll(line, this.regex.action);
if(this.settings.removeAnnotations)
line = removeAll(line, this.regex.annotation);
else if(this.settings.removeTimestamps)
line = removeAll(line, this.regex.timestamp);
return line;
});
lines = _.filter(lines, line => (line.length > 0)); //Remove newly blank lines
_.each(lines, (line) => {
if(line.length > 0) out.push(line);
return out;
}, []);
})
.each(line => {
if((match = this.regex.speaker.exec(line)) !== null) {

@@ -147,9 +144,7 @@ //Regex match

output.order.push(speaker);
});
if(hasCallback) cb(null, output);
return Promise.resolve(output);
}).catch(err => {
if(hasCallback) cb(err);
else return Promise.reject(err);
});
})
.then(() => {
return Promise.resolve(output);
})
.asCallback(cb);
};

@@ -255,5 +250,2 @@

proto.parseStream = function(inputStream, cb) {
const lineStream = readline.createInterface({
input: inputStream
});
//Output object

@@ -269,27 +261,30 @@ const output = {};

lineStream.on('line', line => {
if(line === null) return;
line = this.filterLine(line);
if(!line) return;
inputStream
.pipe(es.split(this.regex.newLine))
.pipe(es.mapSync(line => {
if(line === null) return;
line = this.filterLine(line);
if(!line) return;
if((match = this.regex.speaker.exec(line)) !== null) {
speaker = match[1].trim(); //Regex match - is speaker
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
if((match = this.regex.speaker.exec(line)) !== null) {
speaker = match[1].trim(); //Regex match - is speaker
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return;
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
}).on('close', () => {
return cb(null, output);
});
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return;
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
}))
.on('close', () => {
return cb(null, output);
});

@@ -296,0 +291,0 @@ };

{
"name": "transcript-parser",
"version": "0.6.0",
"version": "0.7.0",
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.",

@@ -39,4 +39,5 @@ "main": "app.js",

"bluebird": "^3.3.4",
"event-stream": "^3.3.4",
"lodash": "^4.9.0"
}
}

@@ -5,2 +5,3 @@ transcript-parser

[![Coverage Status](https://coveralls.io/repos/github/willshiao/transcript-parser/badge.svg?branch=master)](https://coveralls.io/github/willshiao/transcript-parser?branch=master)
[![npm](https://img.shields.io/npm/v/transcript-parser.svg?maxAge=2592000)](https://www.npmjs.com/package/transcript-parser)

@@ -11,4 +12,6 @@ ## Description

Tested for Node.js versions >= 4.4.6
Tests can be run with `npm test` and a benchmark can be run with `npm run benchmark`. For a full coverage report using [Istanbul](https://github.com/gotwarlost/istanbul), run `npm run travis-test`.
Tested for Node.js >= v4.4.6
## Usage

@@ -18,27 +21,30 @@

const fs = require('fs');
const TranscriptParser = require('transcript-parser');
const tp = new TranscriptParser();
//Synchronous example
const parsed = tp.parseOneSync(fs.readFileSync('transcript.txt', {encoding: 'UTF-8'}));
```node
'use strict';
const fs = require('fs');
const TranscriptParser = require('transcript-parser');
const tp = new TranscriptParser();
//Synchronous example
const parsed = tp.parseOneSync(fs.readFileSync('transcript.txt', 'utf8'));
console.log(parsed);
//Asynchronous example
fs.readFile('transcript.txt', (err, data) => {
if(err) return console.error('Error:', err);
tp.parseOne(data, (err, parsed) => {
if(err) return console.error('Error:', err);
console.log(parsed);
//Asynchronous example
fs.readFile('transcript.txt', (err, data) => {
if(err) return console.error('Error:', err);
tp.parseOne(data, (err, parsed => {
if(err) return console.error('Error:', err);
console.log(parsed);
}));
});
//Stream example
const stream = fs.createReadStream('transcript.txt', 'utf8');
tp.parseStream(stream, (err, parsed) => {
if(err) return console.error('Error:', err);
console.log(parsed);
});
}));
});
//Stream example
const stream = fs.createReadStream('transcript.txt', 'utf8');
tp.parseStream(stream, (err, parsed) => {
if(err) return console.error('Error:', err);
console.log(parsed);
});
```
## Config

@@ -156,2 +162,1 @@

+ A callback to be executed on function completion or error.
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc