Socket
Socket
Sign inDemoInstall

transcript-parser

Package Overview
Dependencies
2
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.1.1 to 0.2.0

117

app.js
"use strict";
/***********************
Module dependencies
* Module dependencies
***********************/
// const S = require('string');
const _ = require('lodash');
const Promise = require('bluebird');
/***********************
Object creation
* Object creation
***********************/

@@ -33,4 +34,8 @@ const TranscriptParser = function (options) {

const proto = TranscriptParser.prototype;
const tp = this;
proto.parseOne = function(transcript) {
/***********************
* Synchronous parseOne method
***********************/
proto.parseOneSync = function(transcript) {
var lines = transcript.split(this.regex.newLine)

@@ -81,6 +86,67 @@ .filter(line => line.length > 0); //Remove blank lines

proto.resolveAliases = function(data) {
var aliases = this.settings.aliases;
/***********************
* Asynchronous parseOne method
***********************/
proto.parseOne = function(transcript, cb) {
//Output object
const output = {};
//Object containing the speakers and their lines
output.speaker = {};
//List of the speakers, in order
output.order = [];
//Current speaker
var speaker = 'none';
//Remove blank lines
return Promise.filter(transcript.split(this.regex.newLine), line => line.length > 0)
.then(lines => {
if(this.settings.removeActions) {
return Promise.map(lines, line => line.split(this.regex.action).join(''))
}
return Promise.resolve(lines);
}).then(lines => {
if(this.settings.removeAnnotations) {
//Remove annotations
return Promise.map(lines, line => line.split(this.regex.annotation).join(''));
} else if(this.settings.removeTimestamps) {
//Remove timestamps
return Promise.map(lines, line => line.split(this.regex.timestamp).join(''));
}
return Promise.resolve(lines);
}).then(lines => {
return Promise.each(lines, (line, index) => {
if(line.match(this.regex.speaker)) {
//Regex match
speaker = this.regex.speaker.exec(line)[1];
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
}
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker) &&
//And the speaker is defined or the setting to remove undefined speakers is false
(speaker !== 'none' || !this.settings.removeUnknownSpeakers)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
if(speaker !== 'none' || !this.settings.removeUnknownSpeakers) {
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
}
});
}).then(() => {
cb(null, output);
})
.catch(err => cb(err));
};
/***********************
* Synchronous resolveAliases method
***********************/
proto.resolveAliasesSync = function(data) {
const aliases = this.settings.aliases;
if(_.isEmpty(aliases)) return data;
var speakers = data.speaker;
const speakers = data.speaker;

@@ -122,3 +188,42 @@ for(var speaker in speakers) {

/***********************
* Asynchronous resolveAliases method
***********************/
proto.resolveAliases = function(data, cb) {
const aliases = this.settings.aliases;
if(_.isEmpty(aliases)) return cb(null, data);
const speakers = data.speaker;
return Promise.all(_.keys(speakers).map(speakerName => {
return Promise.all(_.keys(aliases).map(trueName => {
return Promise.each(aliases[trueName], regex => {
//If the regex matches
if(regex.test(speakerName)) {
//Add the lines from the regex-matched speaker
//to the new speaker if the new speaker exists
speakers[trueName] = speakers[trueName] ?
_.concat(speakers[trueName], speakers[speakerName]) :
//Otherwise, make a new list
speakers[trueName] = speakers[speakerName];
//Delete the old key
delete speakers[speakerName];
return;
}
})
}))
})).then(() => {
return Promise.each(data.order, (speaker, speakerIndex) => {
return Promise.all(_.map(aliases, (alias, trueName) => {
return Promise.all(_.map(alias, (regex, regexIndex) => {
if(speaker.search(regex) !== -1) {
return data.order[speakerIndex] = trueName;
}
}));
}));
});
}).then(() => {
cb(null, data);
}).catch(err => cb(err));
};
module.exports = TranscriptParser;

2

package.json
{
"name": "transcript-parser",
"version": "0.1.1",
"version": "0.2.0",
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.",

@@ -5,0 +5,0 @@ "main": "app.js",

@@ -18,4 +18,4 @@ transcript-parser

//Do not use readFileSync in production
const output = tp.parseOne(fs.readFileSync('transcript.txt', {encoding: 'UTF-8'}));
//Do not use fs.readFileSync in production
const output = tp.parseOneSync(fs.readFileSync('transcript.txt', {encoding: 'UTF-8'}));
console.log(output);

@@ -52,2 +52,16 @@

### .parseOneSync()
The `parseOneSync()` method parses a string and returns an object representing it.
#### Syntax
`tp.parseOneSync(transcript)`
##### Parameters
- `transcript`
+ The transcript, as a `string`.
### .parseOne()

@@ -59,3 +73,3 @@

`tp.parseOne(transcript)`
`tp.parseOne(transcript, callback)`

@@ -65,5 +79,23 @@ ##### Parameters

- `transcript`
- The transcript, as a `string`.
+ The transcript, as a `string`.
- `callback(err, data)`
+ A callback to be exectuted on function completion.
### .resolveAliasesSync()
The `resolveAliasesSync()` method resolves all aliases specified in the configuration passed to the `TranscriptParser`'s constructor (see above).
Renames the names in the `order` list to match the new names in the transcript. Note that there is a signifigant performance penalty, so don't use this method unless you need it.
#### Syntax
`tp.resolveAliasesSync(data)`
##### Parameters
- `data`
+ The transcript object after being parsed.
### .resolveAliases()

@@ -77,3 +109,3 @@

`tp.resolveAliases(data)`
`tp.resolveAliases(data, callback)`

@@ -83,2 +115,5 @@ ##### Parameters

- `data`
- The transcript object after being parsed.
+ The transcript object after being parsed.
- `callback(err, resolved)`
+ A callback to be executed on function completion.

@@ -15,3 +15,7 @@ "use strict";

describe('#parseOne()', function(){
/*
* For the synchronous parseOne method
*
*/
describe('#parseOneSync()', function(){
const tp = new TranscriptParser();

@@ -21,3 +25,3 @@

const parser = new TranscriptParser();
var result = parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)');
var result = parser.parseOneSync('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)');
result.speaker.should.eql({

@@ -32,3 +36,3 @@ 'PERSON A': [

const parser = new TranscriptParser({removeActions: false});
var result = parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)');
var result = parser.parseOneSync('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)');
result.speaker.should.eql({

@@ -43,3 +47,3 @@ 'PERSON A': [

const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: false});
var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The...');
var result = parser.parseOneSync('[20:20:34] BERMAN: [2:1:41] The...');
result.speaker.should.eql({

@@ -54,3 +58,3 @@ '[20:20:34] BERMAN': [

const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: true});
var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The [first] name...');
var result = parser.parseOneSync('[20:20:34] BERMAN: [2:1:41] The [first] name...');
result.speaker.should.eql({

@@ -65,3 +69,3 @@ 'BERMAN': [

const parser = new TranscriptParser({removeUnknownSpeakers: true});
var result = parser.parseOne('The quick [brown] fox jumps over the (lazy) dog.');
var result = parser.parseOneSync('The quick [brown] fox jumps over the (lazy) dog.');
result.should.eql({

@@ -77,3 +81,3 @@ speaker: {},

.then(info => {
this.result = tp.parseOne(info);
this.result = tp.parseOneSync(info);
return readExpected(1);

@@ -86,6 +90,108 @@ }).then(expected => {

});
});
describe('#resolveAliases()', function () {
/*
* For the asynchronous parseOne method
*
*/
describe('#parseOne()', function(){
const tp = new TranscriptParser();
it('should remove actions by default', function(done) {
const parser = new TranscriptParser();
parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'PERSON A': [
'Hello, my name is Bob.'
]
});
done();
});
});
it('should respect the removeActions setting', function(done) {
const parser = new TranscriptParser({removeActions: false});
var result = parser.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'PERSON A': [
'Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)'
]
});
done();
});
});
it('should respect the removeTimestamps setting', function(done) {
const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: false});
var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The...',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'[20:20:34] BERMAN': [
'[2:1:41] The...'
]
});
done();
});
});
it('should be able to remove timestamps without removing annotations', function(done) {
const parser = new TranscriptParser({removeAnnotations: false, removeTimestamps: true});
var result = parser.parseOne('[20:20:34] BERMAN: [2:1:41] The [first] name...',
function(err, result) {
if(err) return done(err);
result.speaker.should.eql({
'BERMAN': [
'The [first] name...'
]
});
done();
});
});
it('should respect the remove unknown speakers setting', function(done) {
const parser = new TranscriptParser({removeUnknownSpeakers: true});
var result = parser.parseOne('The quick [brown] fox jumps over the (lazy) dog.',
function(err, result) {
if(err) return done(err);
result.should.eql({
speaker: {},
order: []
});
done();
});
});
it('should parse a transcript correctly', function(done) {
readSample(1)
.bind({})
.then(info => {
return Promise.fromCallback(cb => {
tp.parseOne(info, cb);
});
})
.then(result => {
this.result = result;
return readExpected(1);
}).then(expected => {
this.result.should.be.eql(JSON.parse(expected));
done();
})
.catch(e => done(e));
});
});
/*
* For the synchronous resolveAliases method
*
*/
describe('#resolveAliasesSync()', function () {
it('should resolve aliases correctly', function(done) {

@@ -98,4 +204,4 @@ const tp = new TranscriptParser({

.then(info => {
this.result = tp.parseOne(info);
this.result = tp.resolveAliases(this.result);
this.result = tp.parseOneSync(info);
this.result = tp.resolveAliasesSync(this.result);
return readExpected(2);

@@ -113,4 +219,4 @@ }).then(expected => {

.then(info => {
var parsed = tp.parseOne(info);
var resolved = tp.resolveAliases(parsed);
var parsed = tp.parseOneSync(info);
var resolved = tp.resolveAliasesSync(parsed);
parsed.should.equal(resolved);

@@ -121,4 +227,47 @@ done();

});
});
/*
* For the asynchronous resolveAliases method
*
*/
describe('#resolveAliases()', function () {
it('should resolve aliases correctly', function(done) {
const tp = new TranscriptParser({
aliases: { "DONALD TRUMP": [ /.*TRUMP.*/ ] }
});
readSample(2)
.bind({})
.then(info => {
return Promise.fromCallback(cb => tp.parseOne(info, cb));
}).then(result => {
return Promise.fromCallback(cb => tp.resolveAliases(result, cb));
}).then(result => {
this.result = result;
return readExpected(2);
}).then(expected => {
this.result.should.eql(JSON.parse(expected));
done();
})
.catch(e => done(e));
});
it('should return unchanged data if aliases are not set', function(done) {
const tp = new TranscriptParser({aliases: {}});
readSample(2)
.bind({})
.then(info => {
return Promise.fromCallback(cb => tp.parseOne(info, cb));
}).then(parsed => {
this.parsed = parsed;
return Promise.fromCallback(cb => tp.resolveAliases(parsed, cb));
}).then(resolved => {
this.parsed.should.equal(resolved);
done();
})
.catch(e => done(e));
});
});
});

@@ -125,0 +274,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc