transcript-parser
Advanced tools
Comparing version 0.7.0 to 0.7.1
@@ -19,3 +19,3 @@ 'use strict'; | ||
removeAnnotations: true, | ||
removeTimestamps: true, //Overriden by removeAnnotations | ||
removeTimestamps: true, // Overriden by removeAnnotations | ||
removeUnknownSpeakers: false, | ||
@@ -30,9 +30,11 @@ aliases: {}, | ||
}, | ||
conciseSpeakers: false, | ||
blacklist: [] | ||
}; | ||
this.settings = _.assign(this.defaultSettings, options); | ||
this.settings = {}; | ||
_.defaultsDeep(this.settings, options, this.defaultSettings); | ||
this.regex = this.settings.regex; | ||
}; | ||
//Expose the object | ||
// Expose the object | ||
exports = module.exports = TranscriptParser; | ||
@@ -60,11 +62,11 @@ | ||
//Output object | ||
// Output object | ||
const output = {}; | ||
//Object containing the speakers and their lines | ||
// Object containing the speakers and their lines | ||
output.speaker = {}; | ||
//List of the speakers, in order | ||
// List of the speakers, in order | ||
output.order = []; | ||
let speaker = 'none'; //Current speaker | ||
let ignore = false; //Are we ignoring the line because of a blacklisted speaker? | ||
let speaker = 'none'; // Current speaker | ||
let ignore = false; // Are we ignoring the line because of a blacklisted speaker? | ||
let match; | ||
@@ -74,5 +76,4 @@ | ||
if((match = this.regex.speaker.exec(line)) !== null) { | ||
//Regex match | ||
speaker = match[1].trim(); | ||
//Remove the speaker from the line | ||
speaker = match[1].trim(); // Get speaker from regex match | ||
// Remove the speaker from the line | ||
line = line.replace(this.regex.speaker, ''); | ||
@@ -82,12 +83,19 @@ ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
//If speaker was blacklisted, return | ||
// If speaker was blacklisted, return | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
// If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
// Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
// Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
output.order.push(speaker); | ||
if(!this.settings.conciseSpeakers) { | ||
output.order.push(speaker); | ||
} else if(shouldAddNewSpeaker(output.order, speaker)) { | ||
output.order.push([speaker, 1]); // Add new speaker | ||
} else { | ||
output.order[output.order.length - 1][1]++; // Last speaker is the same, so increment count by one | ||
} | ||
}); | ||
@@ -97,2 +105,7 @@ return output; | ||
function shouldAddNewSpeaker(order, speaker) { | ||
return order.length === 0 // Must be new if the array is empty | ||
|| order[order.length - 1][0] !== speaker; // Make sure new speaker is different from last one | ||
} | ||
/*********************** | ||
@@ -102,9 +115,9 @@ * Asynchronous parseOne method | ||
proto.parseOne = function(transcript, cb) { | ||
//Output object | ||
// Output object | ||
const output = {}; | ||
//Object containing the speakers and their lines | ||
// Object containing the speakers and their lines | ||
output.speaker = {}; | ||
//List of the speakers, in order | ||
// List of the speakers, in order | ||
output.order = []; | ||
let speaker = 'none'; //Current speaker | ||
let speaker = 'none'; // Current speaker | ||
let ignore = false, match = null; | ||
@@ -128,5 +141,4 @@ | ||
if((match = this.regex.speaker.exec(line)) !== null) { | ||
//Regex match | ||
speaker = match[1].trim(); | ||
//Remove the speaker from the line | ||
speaker = match[1].trim(); // Regex match | ||
// Remove the speaker from the line | ||
line = line.replace(this.regex.speaker, ''); | ||
@@ -136,12 +148,19 @@ ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
//If speaker was blacklisted, return | ||
// If speaker was blacklisted, return | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
// If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
// Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
// Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
output.order.push(speaker); | ||
if(!this.settings.conciseSpeakers) { | ||
output.order.push(speaker); | ||
} else if(shouldAddNewSpeaker(output.order, speaker)) { | ||
output.order.push([speaker, 1]); // Add new speaker | ||
} else { | ||
output.order[output.order.length - 1][1]++; // Last speaker is the same, so increment count by one | ||
} | ||
}) | ||
@@ -165,16 +184,14 @@ .then(() => { | ||
_.each(regexes, (regex) => { | ||
//If the regex matches | ||
// If the regex matches | ||
if(regex.test(speakerName) && speakerName != newName) { | ||
if(newName in speakers) { | ||
//Add the lines from the regex-matched speaker | ||
//to the new speaker if the new speaker exists | ||
// Add the lines from the regex-matched speaker | ||
// to the new speaker if the new speaker exists | ||
speakers[newName] = _.concat(lines, speakers[newName]); | ||
} else { | ||
//Otherwise, make a new list | ||
speakers[newName] = lines; | ||
speakers[newName] = lines; // Otherwise, make a new list | ||
} | ||
//Delete the old key | ||
// Delete the old key | ||
delete speakers[speakerName]; | ||
//Break | ||
return false; | ||
return false; // Break | ||
} | ||
@@ -185,6 +202,6 @@ }); | ||
//Fix the names in the order array | ||
// Fix the names in the order array | ||
data.order = data.order.map(speaker => { | ||
for(var trueName in aliases) { | ||
for(var aliasKey in aliases[trueName]) { | ||
for(const trueName in aliases) { | ||
for(const aliasKey in aliases[trueName]) { | ||
if(speaker.search(aliases[trueName][aliasKey]) !== -1) { | ||
@@ -218,12 +235,11 @@ return trueName; | ||
return Promise.each(aliases[trueName], regex => { | ||
//If the regex matches | ||
// If the regex matches | ||
if(regex.test(speakerName) && speakerName != trueName) { | ||
//Add the lines from the regex-matched speaker | ||
//to the new speaker if the new speaker exists | ||
// Add the lines from the regex-matched speaker | ||
// to the new speaker if the new speaker exists | ||
speakers[trueName] = speakers[trueName] ? | ||
_.concat(speakers[speakerName], speakers[trueName]) : | ||
//Otherwise, make a new list | ||
speakers[speakerName]; | ||
//Delete the old key | ||
delete speakers[speakerName]; | ||
speakers[speakerName]; // Otherwise, make a new list | ||
delete speakers[speakerName]; // Delete the old key | ||
return; | ||
@@ -255,7 +271,7 @@ } | ||
proto.parseStream = function(inputStream, cb) { | ||
//Output object | ||
// Output object | ||
const output = {}; | ||
//Object containing the speakers and their lines | ||
// Object containing the speakers and their lines | ||
output.speaker = {}; | ||
//List of the speakers, in order | ||
// List of the speakers, in order | ||
output.order = []; | ||
@@ -274,4 +290,4 @@ | ||
if((match = this.regex.speaker.exec(line)) !== null) { | ||
speaker = match[1].trim(); //Regex match - is speaker | ||
//Remove the speaker from the line | ||
speaker = match[1].trim(); // Regex match - is speaker | ||
// Remove the speaker from the line | ||
line = line.replace(this.regex.speaker, ''); | ||
@@ -282,9 +298,9 @@ ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
// If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
// Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//If the speaker is defined or the setting to remove undefined speakers is false | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
// If the speaker is defined or the setting to remove undefined speakers is false | ||
// Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
@@ -299,4 +315,4 @@ output.order.push(speaker); | ||
//Filters a line based on the defined settings | ||
//Returns null on the line being completely removed | ||
// Filters a line based on the defined settings | ||
// Returns null on the line being completely removed | ||
proto.filterLine = function(line) { | ||
@@ -303,0 +319,0 @@ if(typeof line !== 'string') line = line.toString(); |
{ | ||
"name": "transcript-parser", | ||
"version": "0.7.0", | ||
"version": "0.7.1", | ||
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.", | ||
@@ -5,0 +5,0 @@ "main": "app.js", |
@@ -7,2 +7,14 @@ transcript-parser | ||
- [Description](#description) | ||
- [Usage](#usage) | ||
- [Config](#config) | ||
- [Documentation](#documentation) | ||
* [\.parseStream()](#parsestream) | ||
* [\.parseOneSync()](#parseonesync) | ||
* [\.parseOne()](#parseone) | ||
* [\.resolveAliasesSync()](#resolvealiasessync) | ||
* [\.resolveAliases()](#resolvealiases) | ||
- [Example](#example) | ||
## Description | ||
@@ -50,9 +62,7 @@ | ||
The constructor for `TranscriptParser` accepts an options argument. | ||
The constructor for `TranscriptParser` accepts a settings object. | ||
#### Options: | ||
- `removeActions` | ||
+ default: `true` | ||
+ Specifies if the parser should remove actions (e.g. "(APPLAUSE)"). | ||
+ Specifies if the parser should remove actions (e.g. `(APPLAUSE)`). | ||
- `removeAnnotations` | ||
@@ -78,3 +88,5 @@ + default: `true` | ||
Settings can be changed after object creation by changing the corresponding properties of `tp.settings`, where `tp` is an instance of `TranscriptParser`. | ||
## Documentation | ||
@@ -162,1 +174,27 @@ | ||
+ A callback to be executed on function completion or error. | ||
## Example | ||
### Input | ||
``` | ||
A: I like Node.js. | ||
A: I also like C#. | ||
B: I like Node.js too! | ||
A: I especially like the Node Package Manager. | ||
``` | ||
### Output | ||
```node | ||
{ | ||
speaker: { | ||
A: [ | ||
'I like Node.js.', | ||
'I also like C#.', | ||
'I especially like the Node Package Manager.' | ||
], | ||
B: ['I like Node.js too!'] | ||
}, | ||
order: ['A', 'A', 'B', 'A'] | ||
} | ||
``` |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
19587
283
197