Socket
Socket
Sign inDemoInstall

transcript-parser

Package Overview
Dependencies
2
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.5.0 to 0.6.0

2

app.js
'use strict';
module.exports = require('./lib/parser');
module.exports = require('./lib/parser');

@@ -8,3 +8,3 @@ 'use strict';

const Promise = require('bluebird');
const byline = require('byline');
const readline = require('readline');

@@ -15,3 +15,3 @@

***********************/
const TranscriptParser = function (options) {
const TranscriptParser = function(options) {
options = options || {};

@@ -46,14 +46,17 @@ this.defaultSettings = {

proto.parseOneSync = function(transcript) {
var lines = transcript.split(this.regex.newLine)
.filter(line => line.length > 0); //Remove blank lines
lines = this.settings.removeActions ? lines.map(line => removeAll(line, this.regex.action)): lines;
if(this.settings.removeAnnotations) {
//Remove annotations
lines = lines.map(line => removeAll(line, this.regex.annotation));
} else if(this.settings.removeTimestamps) {
//Remove timestamps
lines = lines.map(line => removeAll(line, this.regex.timestamp));
}
lines = lines.filter(line => line.length > 0); //Remove newly blank lines
let lines = transcript.split(this.regex.newLine);
lines = _.map(lines, line => {
if(line.length <= 0) return '';
if(this.settings.removeActions)
line = removeAll(line, this.regex.action);
if(this.settings.removeAnnotations)
line = removeAll(line, this.regex.annotation);
else if(this.settings.removeTimestamps)
line = removeAll(line, this.regex.timestamp);
return line;
});
lines = _.filter(lines, line => (line.length > 0)); //Remove newly blank lines
//Output object

@@ -66,17 +69,17 @@ const output = {};

//Current speaker
var speaker = 'none';
//Are we ignoring the line because of a blacklisted speaker?
var ignore = false;
let speaker = 'none'; //Current speaker
let ignore = false; //Are we ignoring the line because of a blacklisted speaker?
let match;
for(var i = 0; i < lines.length; i++) {
if(lines[i].match(this.regex.speaker)) {
//Regex match - is speaker
speaker = this.regex.speaker.exec(lines[i])[1].trim();
_.each(lines, (line) => {
if((match = this.regex.speaker.exec(line)) !== null) {
//Regex match
speaker = match[1].trim();
//Remove the speaker from the line
lines[i] = lines[i].replace(this.regex.speaker, '');
//Ignore the speaker if he is in our blacklist
line = line.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) continue;
//If speaker was blacklisted, return
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return;
//If the speaker's key doesn't already exist

@@ -88,5 +91,5 @@ if(!(speaker in output.speaker)) {

//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(lines[i]);
output.speaker[speaker].push(line);
output.order.push(speaker);
}
});
return output;

@@ -99,3 +102,3 @@ };

proto.parseOne = function(transcript, cb) {
const hasCallback = (typeof cb !== 'undefined' && cb !== null);
const hasCallback = (typeof cb !== 'undefined' && cb !== null);
//Output object

@@ -107,60 +110,46 @@ const output = {};

output.order = [];
//Current speaker
var speaker = 'none';
let speaker = 'none'; //Current speaker
//Convert synchronous errors to asynchronous ones
try {
return Promise.try(() => {
//Remove blank lines
return Promise.filter(transcript.split(this.regex.newLine), line => line.length > 0)
.then(lines => {
if(this.settings.removeActions) {
return Promise.map(lines, line => removeAll(line, this.regex.action));
}
return Promise.resolve(lines);
}).then(lines => {
if(this.settings.removeAnnotations) {
//Remove annotations
return Promise.map(lines, line => removeAll(line, this.regex.annotation));
} else if(this.settings.removeTimestamps) {
//Remove timestamps
return Promise.map(lines, line => removeAll(line, this.regex.timestamp));
}
return Promise.resolve(lines);
})
.then(lines => {
//Remove newly blank lines
return Promise.filter(lines, line => line.length > 0);
})
.then(lines => {
var ignore = false;
return Promise.each(lines, (line) => {
if(line.match(this.regex.speaker)) {
//Regex match
speaker = this.regex.speaker.exec(line)[1].trim();
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
//If speaker was blacklisted, return
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return;
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
});
}).then(() => {
if(hasCallback) cb(null, output);
return Promise.resolve(output);
}).catch(err => {
if(hasCallback) cb(err);
else return this.reject(err);
});
} catch(err) {
if(hasCallback) cb(err);
else return Promise.reject(err);
}
let lines = _.filter(transcript.split(this.regex.newLine), line => (line.length > 0));
let ignore = false, match = null;
lines = _.map(lines, line => {
if(this.settings.removeActions)
line = removeAll(line, this.regex.action);
if(this.settings.removeAnnotations)
line = removeAll(line, this.regex.annotation);
else if(this.settings.removeTimestamps)
line = removeAll(line, this.regex.timestamp);
return line;
});
lines = _.filter(lines, line => (line.length > 0)); //Remove newly blank lines
_.each(lines, (line) => {
if((match = this.regex.speaker.exec(line)) !== null) {
//Regex match
speaker = match[1].trim();
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
//If speaker was blacklisted, return
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return;
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
});
if(hasCallback) cb(null, output);
return Promise.resolve(output);
}).catch(err => {
if(hasCallback) cb(err);
else return Promise.reject(err);
});
};

@@ -183,7 +172,7 @@

//Add the lines from the regex-matched speaker
//to the new speaker if the new speaker exists
//to the new speaker if the new speaker exists
speakers[newName] = _.concat(lines, speakers[newName]);
} else {
//Otherwise, make a new list
speakers[newName] = lines.slice();
//Otherwise, make a new list
speakers[newName] = lines;
}

@@ -198,3 +187,3 @@ //Delete the old key

});
//Fix the names in the order array

@@ -223,10 +212,8 @@ data.order = data.order.map(speaker => {

if(_.isEmpty(aliases)) {
if(hasCallback) cb(null, data);
return Promise.resolve(data);
if(hasCallback) return cb(null, data);
else return Promise.resolve(data);
}
//Convert synchronous errors to asynchronous ones
try {
return Promise.try(() => {
const speakers = data.speaker;
return Promise.all(_.keys(speakers).map(speakerName => {

@@ -241,4 +228,4 @@ return Promise.all(_.keys(aliases).map(trueName => {

_.concat(speakers[speakerName], speakers[trueName]) :
//Otherwise, make a new list
speakers[trueName] = speakers[speakerName];
//Otherwise, make a new list
speakers[speakerName];
//Delete the old key

@@ -250,28 +237,27 @@ delete speakers[speakerName];

}));
})).then(() => {
return Promise.each(data.order, (speaker, speakerIndex) => {
return Promise.all(_.map(aliases, (alias, trueName) => {
return Promise.all(_.map(alias, (regex) => {
if(speaker.search(regex) !== -1) {
data.order[speakerIndex] = trueName;
return;
}
}));
}));
}).then(() => {
return Promise.each(data.order, (speaker, speakerIndex) => {
return Promise.all(_.map(aliases, (alias, trueName) => {
return Promise.all(_.map(alias, (regex) => {
if(speaker.search(regex) !== -1) {
data.order[speakerIndex] = trueName;
return;
}
}));
});
}).then(() => {
if(hasCallback) cb(null, data);
return Promise.resolve(data);
}).catch(err => {
if(hasCallback) cb(err);
else return this.reject(err);
}));
});
} catch(err) {
}).then(() => {
if(hasCallback) cb(null, data);
return Promise.resolve(data);
}).catch(err => {
if(hasCallback) cb(err);
else return Promise.reject(err);
}
});
};
proto.parseStream = function(inputStream, cb) {
const stream = byline.createStream(inputStream);
const lineStream = readline.createInterface({
input: inputStream
});
//Output object

@@ -283,32 +269,30 @@ const output = {};

output.order = [];
var speaker = 'none';
var ignore = false;
stream.on('readable', () => {
const line = stream.read();
if(line === null) return cb(null, output);
let speaker = 'none';
let ignore = false, match;
var filteredLine = this.filterLine(line);
if(filteredLine) {
if(filteredLine.match(this.regex.speaker)) {
//Regex match - is speaker
speaker = this.regex.speaker.exec(filteredLine)[1].trim();
//Remove the speaker from the line
filteredLine = filteredLine.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
if(ignore || (speaker === 'none' && this.settings.removeSpeakers)) return;
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
if(!this.settings.removeUnknownSpeakers) {
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(filteredLine);
output.order.push(speaker);
}
}
lineStream.on('line', line => {
if(line === null) return;
line = this.filterLine(line);
if(!line) return;
if((match = this.regex.speaker.exec(line)) !== null) {
speaker = match[1].trim(); //Regex match - is speaker
//Remove the speaker from the line
line = line.replace(this.regex.speaker, '');
ignore = (this.settings.blacklist.indexOf(speaker) > -1);
}
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return;
//If the speaker's key doesn't already exist
if(!(speaker in output.speaker)) {
//Set the output's speaker key to a new empty array
output.speaker[speaker] = [];
}
//If the speaker is defined or the setting to remove undefined speakers is false
//Add the text to the output speaker's key and speaker name to the order array
output.speaker[speaker].push(line);
output.order.push(speaker);
}).on('close', () => {
return cb(null, output);
});

@@ -321,9 +305,10 @@

proto.filterLine = function(line) {
if(typeof line !== 'string') {line = line.toString();}
line = this.settings.removeActions ? removeAll(line, this.regex.action) : line;
if(this.settings.removeAnnotations) {
if(typeof line !== 'string') line = line.toString();
if(this.settings.removeActions)
line = removeAll(line, this.regex.action);
if(this.settings.removeAnnotations)
line = removeAll(line, this.regex.annotation);
} else if(this.settings.removeTimestamps) {
else if(this.settings.removeTimestamps)
line = removeAll(line, this.regex.timestamp);
}
if(line.length <= 0) return null;

@@ -335,2 +320,2 @@ return line;

return text.split(regex).join('');
}
}
{
"name": "transcript-parser",
"version": "0.5.0",
"version": "0.6.0",
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.",
"main": "app.js",
"scripts": {
"test": "node ./node_modules/mocha/bin/mocha",
"travis-test": "node ./node_modules/istanbul/lib/cli.js cover node_modules/mocha/bin/_mocha",
"test": "mocha",
"travis-test": "node ./node_modules/istanbul/lib/cli.js cover ./node_modules/mocha/bin/_mocha",
"benchmark": "node ./benchmark/benchmark.js"

@@ -30,2 +30,3 @@ },

"devDependencies": {
"benchmark": "^2.1.1",
"chai": "^3.5.0",

@@ -39,5 +40,4 @@ "chai-as-promised": "^5.3.0",

"bluebird": "^3.3.4",
"byline": "^4.2.1",
"lodash": "^4.9.0"
}
}

@@ -8,4 +8,6 @@ transcript-parser

Parses plaintext speech/debate/radio transcripts into JavaScript objects. It is still in early development and is not stable. Pull requests are welcome.
Parses plaintext speech/debate/radio transcripts into JavaScript objects. It is still in early development. Pull requests are welcome.
Tested for Node.js versions >= 4.4.6
## Usage

@@ -23,3 +25,3 @@

//Asyncronous example
//Asynchronous example
fs.readFile('transcript.txt', (err, data) => {

@@ -60,2 +62,6 @@ if(err) return console.error('Error:', err);

+ Specifies if the parser should remove lines that have no associated speaker.
+ If true, lines that have no associated speaker will be stored under the key `none`.
- `blacklist`
+ default: `[]`
+ A list of speakers (as strings) that the parser should ignore.
- `aliases`

@@ -62,0 +68,0 @@ + default: `{}`

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc