Socket
Socket
Sign inDemoInstall

transcript-parser

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

transcript-parser - npm Package Compare versions

Comparing version 0.2.0 to 0.3.0

benchmark/benchmark.js

227

app.js

@@ -1,226 +0,3 @@

"use strict";
/***********************
* Module dependencies
***********************/
// const S = require('string');
const _ = require('lodash');
const Promise = require('bluebird');
'use strict';
/***********************
* Object creation
***********************/
const TranscriptParser = function (options) {
options = options || {};
this.defaultSettings = {
removeActions: true,
removeAnnotations: true,
removeTimestamps: true, //Overriden by removeAnnotations
removeUnknownSpeakers: false,
aliases: {}
};
this.settings = _.assign(this.defaultSettings, options);
this.regex = {
newLine: /\r?\n/,
action: /\([A-Z\ ]+\)\ ?/,
speaker: /^((?:\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?)?[A-Z\d\ \/,.\-\(\)]+)(?: \[.+\])?:\ ?/,
timestamp: /\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?/,
annotation: /\[.+?\]\ ?/
};
};
const proto = TranscriptParser.prototype;
const tp = this;
/***********************
* Synchronous parseOne method
***********************/
proto.parseOneSync = function(transcript) {
var lines = transcript.split(this.regex.newLine)
.filter(line => line.length > 0); //Remove blank lines
lines = (this.settings.removeActions) ? lines.map(line => line.split(this.regex.action).join('')): lines;
if(this.settings.removeAnnotations) {
//Remove annotations
lines = lines.map(line => line.split(this.regex.annotation).join(''));
} else if(this.settings.removeTimestamps) {
//Remove timestamps
lines = lines.map(line => line.split(this.regex.timestamp).join(''));
}
//Output object
const output = {};
//Object containing the speakers and their lines
output.speaker = {};
//List of the speakers, in order
output.order = [];
//Current speaker
var speaker = 'none';
for(var i = 0; i < lines.length; i++) {
if(lines[i].match(this.regex.speaker)) {
//Regex match
speaker = this.regex.speaker.exec(lines[i])[1];
//Remove the speaker from the line
lines[i] = lines[i].replace(this.regex.speaker, '');
}
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker) &&
//And the speaker is defined or the setting to remove undefined speakers is false
(speaker !== 'none' || !this.settings.removeUnknownSpeakers)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
if(speaker !== 'none' || !this.settings.removeUnknownSpeakers) {
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(lines[i]);
output.order.push(speaker);
}
}
return output;
};
/***********************
* Asynchronous parseOne method
***********************/
proto.parseOne = function(transcript, cb) {
//Output object
const output = {};
//Object containing the speakers and their lines
output.speaker = {};
//List of the speakers, in order
output.order = [];
//Current speaker
var speaker = 'none';
//Remove blank lines
return Promise.filter(transcript.split(this.regex.newLine), line => line.length > 0)
.then(lines => {
if(this.settings.removeActions) {
return Promise.map(lines, line => line.split(this.regex.action).join(''))
}
return Promise.resolve(lines);
}).then(lines => {
if(this.settings.removeAnnotations) {
//Remove annotations
return Promise.map(lines, line => line.split(this.regex.annotation).join(''));
} else if(this.settings.removeTimestamps) {
//Remove timestamps
return Promise.map(lines, line => line.split(this.regex.timestamp).join(''));
}
return Promise.resolve(lines);
}).then(lines => {
return Promise.each(lines, (line, index) => {
if(line.match(this.regex.speaker)) {
//Regex match
speaker = this.regex.speaker.exec(line)[1];
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
}
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker) &&
//And the speaker is defined or the setting to remove undefined speakers is false
(speaker !== 'none' || !this.settings.removeUnknownSpeakers)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
if(speaker !== 'none' || !this.settings.removeUnknownSpeakers) {
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
}
});
}).then(() => {
cb(null, output);
})
.catch(err => cb(err));
};
/***********************
* Synchronous resolveAliases method
***********************/
proto.resolveAliasesSync = function(data) {
const aliases = this.settings.aliases;
if(_.isEmpty(aliases)) return data;
const speakers = data.speaker;
for(var speaker in speakers) {
for(var trueName in aliases) {
for(var aliasKey in aliases[trueName]) {
var aliasRegex = aliases[trueName][aliasKey];
//If the regex matches
if(aliasRegex.test(speaker)) {
//Add the lines from the regex-matched speaker
//to the new speaker if the new speaker exists
speakers[trueName] = speakers[trueName] ?
_.concat(speakers[trueName], speakers[speaker]) :
//Otherwise, make a new list
speakers[trueName] = speakers[speaker];
//Delete the old key
delete speakers[speaker];
break;
}
}
}
}
//Fix the names in the order array
data.order = data.order.map(speaker => {
for(trueName in aliases) {
for(var aliasKey in aliases[trueName]) {
if(speaker.search(aliases[trueName][aliasKey]) !== -1) {
return trueName;
}
}
}
return speaker;
});
return data;
};
/***********************
* Asynchronous resolveAliases method
***********************/
proto.resolveAliases = function(data, cb) {
const aliases = this.settings.aliases;
if(_.isEmpty(aliases)) return cb(null, data);
const speakers = data.speaker;
return Promise.all(_.keys(speakers).map(speakerName => {
return Promise.all(_.keys(aliases).map(trueName => {
return Promise.each(aliases[trueName], regex => {
//If the regex matches
if(regex.test(speakerName)) {
//Add the lines from the regex-matched speaker
//to the new speaker if the new speaker exists
speakers[trueName] = speakers[trueName] ?
_.concat(speakers[trueName], speakers[speakerName]) :
//Otherwise, make a new list
speakers[trueName] = speakers[speakerName];
//Delete the old key
delete speakers[speakerName];
return;
}
})
}))
})).then(() => {
return Promise.each(data.order, (speaker, speakerIndex) => {
return Promise.all(_.map(aliases, (alias, trueName) => {
return Promise.all(_.map(alias, (regex, regexIndex) => {
if(speaker.search(regex) !== -1) {
return data.order[speakerIndex] = trueName;
}
}));
}));
});
}).then(() => {
cb(null, data);
}).catch(err => cb(err));
};
module.exports = TranscriptParser;
module.exports = require('./lib/parser');
{
"name": "transcript-parser",
"version": "0.2.0",
"version": "0.3.0",
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.",

@@ -8,3 +8,4 @@ "main": "app.js",

"test": "node ./node_modules/mocha/bin/mocha",
"travis-test": "node ./node_modules/istanbul/lib/cli.js cover node_modules/mocha/bin/_mocha"
"travis-test": "node ./node_modules/istanbul/lib/cli.js cover node_modules/mocha/bin/_mocha",
"benchmark": "node ./benchmark/benchmark.js"
},

@@ -11,0 +12,0 @@ "repository": {

@@ -1,2 +0,6 @@

"use strict";
'use strict';
/***********************
* Test dependencies
***********************/
const Promise = require('bluebird');

@@ -7,3 +11,3 @@ const path = require('path');

const chai = require('chai');
chai.should();
const should = chai.should();

@@ -13,3 +17,5 @@ const TEST_DIR = path.join(__dirname, 'transcripts');

/***********************
* Tests
***********************/
describe('TranscriptParser', function() {

@@ -112,12 +118,12 @@

const parser = new TranscriptParser({removeActions: false});
var result = parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'PERSON A': [
'Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)'
]
parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)',
(err, result) => {
if(err) return done(err);
result.speaker.should.eql({
'PERSON A': [
'Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)'
]
});
done();
});
done();
});
});

@@ -127,11 +133,11 @@

const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: false});
var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The...',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'[20:20:34] BERMAN': [
'[2:1:41] The...'
]
});
done();
parser.parseOne('[20:20:34] BERMAN: [2:1:41] The...',
(err, result) => {
if(err) return done(err);
result.speaker.should.eql({
'[20:20:34] BERMAN': [
'[2:1:41] The...'
]
});
done();
});

@@ -142,11 +148,9 @@ });

const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: true});
var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The [first] name...',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'BERMAN': [
'The [first] name...'
]
});
done();
parser.parseOne('[20:20:34] BERMAN: [2:1:41] The [first] name...',
(err, result) => {
if(err) return done(err);
result.speaker.should.eql({
'BERMAN': ['The [first] name...']
});
done();
});

@@ -157,10 +161,10 @@ });

const parser = new TranscriptParser({removeUnknownSpeakers: true});
var result = parser.parseOne('The quick [brown] fox jumps over the (lazy) dog.',
function(err, result) {
if(err) return done(err);
result.should.eql({
speaker: {},
order: []
});
done();
parser.parseOne('The quick [brown] fox jumps over the (lazy) dog.',
(err, result) => {
if(err) return done(err);
result.should.eql({
speaker: {},
order: []
});
done();
});

@@ -187,2 +191,32 @@ });

it('should return a promise when callback is not set', function(done) {
readSample(1)
.bind({})
.then(info => {
return tp.parseOne(info);
})
.then(result => {
this.result = result;
return readExpected(1);
}).then(expected => {
this.result.should.be.eql(JSON.parse(expected));
done();
})
.catch(e => done(e));
});
it('should handle errors properly', function(done) {
tp.parseOne(null).then( output => {
should.not.exist(output);
}).catch(err => {
should.exist(err);
}).finally(() => {
tp.parseOne(null, function(err, output) {
should.exist(err);
should.not.exist(output);
done();
});
});
});
});

@@ -252,2 +286,22 @@

it('should return a promise when callback is not set', function(done) {
const tp = new TranscriptParser({
aliases: { "DONALD TRUMP": [ /.*TRUMP.*/ ] }
});
readSample(2)
.bind({})
.then(info => {
return tp.parseOne(info);
}).then(result => {
return tp.resolveAliases(result);
}).then(result => {
this.result = result;
return readExpected(2);
}).then(expected => {
this.result.should.eql(JSON.parse(expected));
done();
})
.catch(e => done(e));
});
it('should return unchanged data if aliases are not set', function(done) {

@@ -261,5 +315,10 @@ const tp = new TranscriptParser({aliases: {}});

this.parsed = parsed;
//With callback
return Promise.fromCallback(cb => tp.resolveAliases(parsed, cb));
}).then(resolved => {
this.parsed.should.equal(resolved);
//With Promise
return tp.resolveAliases(this.parsed);
}).then(resolved => {
this.parsed.should.equal(resolved);
done();

@@ -269,2 +328,20 @@ })

});
it('should handle errors properly', function(done) {
const tp = new TranscriptParser({
aliases: { "DONALD TRUMP": [ /.*TRUMP.*/ ] }
});
tp.resolveAliases(null).then( output => {
should.not.exist(output);
}).catch(err => {
should.exist(err);
}).finally(() => {
tp.resolveAliases(null, (err, output) => {
should.exist(err);
should.not.exist(output);
done();
});
});
});
});

@@ -271,0 +348,0 @@

@@ -1,2 +0,6 @@

"use strict";
'use strict';
/***********************
* Test dependencies
***********************/
const TranscriptParser = require('../app.js');

@@ -7,2 +11,5 @@ const chai = require('chai');

/***********************
* Tests
***********************/
describe('TranscriptParser', function() {

@@ -9,0 +16,0 @@ const transcriptParser = new TranscriptParser();

@@ -9,2 +9,3 @@ FREDERICK RYAN JR., WASHINGTON POST PUBLISHER: Mr. Trump, welcome to the Washington Post...

FRED HIATT, WASHINGTON POST EDITORIAL PAGE EDITOR: Do you want to start out?
FRED HIATT, WASHINGTON POST EDITORIAL PAGE EDITOR: Do you want to start out?
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc