@@ -1,226 +0,3 @@
		"use strict";
		/***********************
		* Module dependencies
		***********************/
		// const S = require('string');
		const _ = require('lodash');
		const Promise = require('bluebird');
		'use strict';


		/***********************
		* Object creation
		***********************/
		const TranscriptParser = function (options) {
		options = options \|\| {};
		this.defaultSettings = {
		removeActions: true,
		removeAnnotations: true,
		removeTimestamps: true, //Overriden by removeAnnotations
		removeUnknownSpeakers: false,
		aliases: {}
		};
		this.settings = _.assign(this.defaultSettings, options);
		this.regex = {
		newLine: /\r?\n/,
		action: /\([A-Z\ ]+\)\ ?/,
		speaker: /^((?:\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?)?[A-Z\d\ \/,.\-\(\)]+)(?: \[.+\])?:\ ?/,
		timestamp: /\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?/,
		annotation: /\[.+?\]\ ?/
		};

		};

		const proto = TranscriptParser.prototype;
		const tp = this;

		/***********************
		* Synchronous parseOne method
		***********************/
		proto.parseOneSync = function(transcript) {
		var lines = transcript.split(this.regex.newLine)
		.filter(line => line.length > 0); //Remove blank lines
		lines = (this.settings.removeActions) ? lines.map(line => line.split(this.regex.action).join('')): lines;
		if(this.settings.removeAnnotations) {
		//Remove annotations
		lines = lines.map(line => line.split(this.regex.annotation).join(''));
		} else if(this.settings.removeTimestamps) {
		//Remove timestamps
		lines = lines.map(line => line.split(this.regex.timestamp).join(''));
		}

		//Output object
		const output = {};
		//Object containing the speakers and their lines
		output.speaker = {};
		//List of the speakers, in order
		output.order = [];

		//Current speaker
		var speaker = 'none';

		for(var i = 0; i < lines.length; i++) {
		if(lines[i].match(this.regex.speaker)) {
		//Regex match
		speaker = this.regex.speaker.exec(lines[i])[1];
		//Remove the speaker from the line
		lines[i] = lines[i].replace(this.regex.speaker, '');
		}
		//If the speaker's key doesn't already exist
		if(!(speaker in output.speaker) &&
		//And the speaker is defined or the setting to remove undefined speakers is false
		(speaker !== 'none' \|\| !this.settings.removeUnknownSpeakers)) {
		//Set the output's speaker key to a new empty array
		output.speaker[speaker] = [];
		}
		//If the speaker is defined or the setting to remove undefined speakers is false
		if(speaker !== 'none' \|\| !this.settings.removeUnknownSpeakers) {
		//Add the text to the output speaker's key and speaker name to the order array
		output.speaker[speaker].push(lines[i]);
		output.order.push(speaker);
		}
		}
		return output;
		};

		/***********************
		* Asynchronous parseOne method
		***********************/
		proto.parseOne = function(transcript, cb) {
		//Output object
		const output = {};
		//Object containing the speakers and their lines
		output.speaker = {};
		//List of the speakers, in order
		output.order = [];

		//Current speaker
		var speaker = 'none';

		//Remove blank lines
		return Promise.filter(transcript.split(this.regex.newLine), line => line.length > 0)
		.then(lines => {
		if(this.settings.removeActions) {
		return Promise.map(lines, line => line.split(this.regex.action).join(''))
		}
		return Promise.resolve(lines);
		}).then(lines => {
		if(this.settings.removeAnnotations) {
		//Remove annotations
		return Promise.map(lines, line => line.split(this.regex.annotation).join(''));
		} else if(this.settings.removeTimestamps) {
		//Remove timestamps
		return Promise.map(lines, line => line.split(this.regex.timestamp).join(''));
		}
		return Promise.resolve(lines);
		}).then(lines => {
		return Promise.each(lines, (line, index) => {
		if(line.match(this.regex.speaker)) {
		//Regex match
		speaker = this.regex.speaker.exec(line)[1];
		//Remove the speaker from the line
		line = line.replace(this.regex.speaker, '');
		}
		//If the speaker's key doesn't already exist
		if(!(speaker in output.speaker) &&
		//And the speaker is defined or the setting to remove undefined speakers is false
		(speaker !== 'none' \|\| !this.settings.removeUnknownSpeakers)) {
		//Set the output's speaker key to a new empty array
		output.speaker[speaker] = [];
		}
		//If the speaker is defined or the setting to remove undefined speakers is false
		if(speaker !== 'none' \|\| !this.settings.removeUnknownSpeakers) {
		//Add the text to the output speaker's key and speaker name to the order array
		output.speaker[speaker].push(line);
		output.order.push(speaker);
		}
		});
		}).then(() => {
		cb(null, output);
		})
		.catch(err => cb(err));
		};

		/***********************
		* Synchronous resolveAliases method
		***********************/
		proto.resolveAliasesSync = function(data) {
		const aliases = this.settings.aliases;
		if(_.isEmpty(aliases)) return data;
		const speakers = data.speaker;

		for(var speaker in speakers) {
		for(var trueName in aliases) {
		for(var aliasKey in aliases[trueName]) {
		var aliasRegex = aliases[trueName][aliasKey];
		//If the regex matches
		if(aliasRegex.test(speaker)) {
		//Add the lines from the regex-matched speaker
		//to the new speaker if the new speaker exists
		speakers[trueName] = speakers[trueName] ?
		_.concat(speakers[trueName], speakers[speaker]) :
		//Otherwise, make a new list
		speakers[trueName] = speakers[speaker];
		//Delete the old key
		delete speakers[speaker];
		break;
		}
		}
		}
		}

		//Fix the names in the order array
		data.order = data.order.map(speaker => {
		for(trueName in aliases) {
		for(var aliasKey in aliases[trueName]) {
		if(speaker.search(aliases[trueName][aliasKey]) !== -1) {
		return trueName;
		}
		}
		}
		return speaker;
		});

		return data;
		};

		/***********************
		* Asynchronous resolveAliases method
		***********************/
		proto.resolveAliases = function(data, cb) {
		const aliases = this.settings.aliases;
		if(_.isEmpty(aliases)) return cb(null, data);
		const speakers = data.speaker;

		return Promise.all(_.keys(speakers).map(speakerName => {
		return Promise.all(_.keys(aliases).map(trueName => {
		return Promise.each(aliases[trueName], regex => {
		//If the regex matches
		if(regex.test(speakerName)) {
		//Add the lines from the regex-matched speaker
		//to the new speaker if the new speaker exists
		speakers[trueName] = speakers[trueName] ?
		_.concat(speakers[trueName], speakers[speakerName]) :
		//Otherwise, make a new list
		speakers[trueName] = speakers[speakerName];
		//Delete the old key
		delete speakers[speakerName];
		return;
		}
		})
		}))
		})).then(() => {
		return Promise.each(data.order, (speaker, speakerIndex) => {
		return Promise.all(_.map(aliases, (alias, trueName) => {
		return Promise.all(_.map(alias, (regex, regexIndex) => {
		if(speaker.search(regex) !== -1) {
		return data.order[speakerIndex] = trueName;
		}
		}));
		}));
		});
		}).then(() => {
		cb(null, data);
		}).catch(err => cb(err));
		};

		module.exports = TranscriptParser;
		module.exports = require('./lib/parser');

package.json

		{
		"name": "transcript-parser",
		"version": "0.2.0",
		"version": "0.3.0",
		"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.",
		@@ -8,3 +8,4 @@ "main": "app.js",
		"test": "node ./node_modules/mocha/bin/mocha",
		"travis-test": "node ./node_modules/istanbul/lib/cli.js cover node_modules/mocha/bin/_mocha"
		"travis-test": "node ./node_modules/istanbul/lib/cli.js cover node_modules/mocha/bin/_mocha",
		"benchmark": "node ./benchmark/benchmark.js"
		},
		@@ -11,0 +12,0 @@ "repository": {

153

test/parser.js

		@@ -1,2 +0,6 @@
		"use strict";
		'use strict';

		/***********************
		* Test dependencies
		***********************/
		const Promise = require('bluebird');
		@@ -7,3 +11,3 @@ const path = require('path');
		const chai = require('chai');
		chai.should();
		const should = chai.should();

		@@ -13,3 +17,5 @@ const TEST_DIR = path.join(__dirname, 'transcripts');


		/***********************
		* Tests
		***********************/
		describe('TranscriptParser', function() {
		@@ -112,12 +118,12 @@
		const parser = new TranscriptParser({removeActions: false});
		var result = parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)',
		function(err, result) {
		if(err) return done(err);
		result.speaker.should.eql({
		'PERSON A': [
		'Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)'
		]
		parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)',
		(err, result) => {
		if(err) return done(err);
		result.speaker.should.eql({
		'PERSON A': [
		'Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)'
		]
		});
		done();
		});
		done();
		});
		});
		@@ -127,11 +133,11 @@
		const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: false});
		var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The...',
		function(err, result) {
		if(err) return done(err);
		result.speaker.should.eql({
		'[20:20:34] BERMAN': [
		'[2:1:41] The...'
		]
		});
		done();
		parser.parseOne('[20:20:34] BERMAN: [2:1:41] The...',
		(err, result) => {
		if(err) return done(err);
		result.speaker.should.eql({
		'[20:20:34] BERMAN': [
		'[2:1:41] The...'
		]
		});
		done();
		});
		@@ -142,11 +148,9 @@ });
		const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: true});
		var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The [first] name...',
		function(err, result) {
		if(err) return done(err);
		result.speaker.should.eql({
		'BERMAN': [
		'The [first] name...'
		]
		});
		done();
		parser.parseOne('[20:20:34] BERMAN: [2:1:41] The [first] name...',
		(err, result) => {
		if(err) return done(err);
		result.speaker.should.eql({
		'BERMAN': ['The [first] name...']
		});
		done();
		});
		@@ -157,10 +161,10 @@ });
		const parser = new TranscriptParser({removeUnknownSpeakers: true});
		var result = parser.parseOne('The quick [brown] fox jumps over the (lazy) dog.',
		function(err, result) {
		if(err) return done(err);
		result.should.eql({
		speaker: {},
		order: []
		});
		done();
		parser.parseOne('The quick [brown] fox jumps over the (lazy) dog.',
		(err, result) => {
		if(err) return done(err);
		result.should.eql({
		speaker: {},
		order: []
		});
		done();
		});
		@@ -187,2 +191,32 @@ });

		it('should return a promise when callback is not set', function(done) {
		readSample(1)
		.bind({})
		.then(info => {
		return tp.parseOne(info);
		})
		.then(result => {
		this.result = result;
		return readExpected(1);
		}).then(expected => {
		this.result.should.be.eql(JSON.parse(expected));
		done();
		})
		.catch(e => done(e));
		});

		it('should handle errors properly', function(done) {
		tp.parseOne(null).then( output => {
		should.not.exist(output);
		}).catch(err => {
		should.exist(err);
		}).finally(() => {
		tp.parseOne(null, function(err, output) {
		should.exist(err);
		should.not.exist(output);
		done();
		});
		});
		});

		});
		@@ -252,2 +286,22 @@

		it('should return a promise when callback is not set', function(done) {
		const tp = new TranscriptParser({
		aliases: { "DONALD TRUMP": [ /.TRUMP./ ] }
		});
		readSample(2)
		.bind({})
		.then(info => {
		return tp.parseOne(info);
		}).then(result => {
		return tp.resolveAliases(result);
		}).then(result => {
		this.result = result;
		return readExpected(2);
		}).then(expected => {
		this.result.should.eql(JSON.parse(expected));
		done();
		})
		.catch(e => done(e));
		});

		it('should return unchanged data if aliases are not set', function(done) {
		@@ -261,5 +315,10 @@ const tp = new TranscriptParser({aliases: {}});
		this.parsed = parsed;
		//With callback
		return Promise.fromCallback(cb => tp.resolveAliases(parsed, cb));
		}).then(resolved => {
		this.parsed.should.equal(resolved);
		//With Promise
		return tp.resolveAliases(this.parsed);
		}).then(resolved => {
		this.parsed.should.equal(resolved);
		done();
		@@ -269,2 +328,20 @@ })
		});

		it('should handle errors properly', function(done) {
		const tp = new TranscriptParser({
		aliases: { "DONALD TRUMP": [ /.TRUMP./ ] }
		});
		tp.resolveAliases(null).then( output => {
		should.not.exist(output);
		}).catch(err => {
		should.exist(err);
		}).finally(() => {
		tp.resolveAliases(null, (err, output) => {
		should.exist(err);
		should.not.exist(output);
		done();
		});
		});
		});

		});
		@@ -271,0 +348,0 @@

test/regex.js

		@@ -1,2 +0,6 @@
		"use strict";
		'use strict';

		/***********************
		* Test dependencies
		***********************/
		const TranscriptParser = require('../app.js');
		@@ -7,2 +11,5 @@ const chai = require('chai');

		/***********************
		* Tests
		***********************/
		describe('TranscriptParser', function() {
		@@ -9,0 +16,0 @@ const transcriptParser = new TranscriptParser();

test/transcripts/2.txt

		@@ -9,2 +9,3 @@ FREDERICK RYAN JR., WASHINGTON POST PUBLISHER: Mr. Trump, welcome to the Washington Post...

		FRED HIATT, WASHINGTON POST EDITORIAL PAGE EDITOR: Do you want to start out?
		FRED HIATT, WASHINGTON POST EDITORIAL PAGE EDITOR: Do you want to start out?

transcript-parser - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics