transcript-parser
Advanced tools
Comparing version 0.4.1 to 0.5.0
@@ -28,3 +28,4 @@ 'use strict'; | ||
annotation: /\[.+?\]\ ?/ | ||
} | ||
}, | ||
blacklist: [] | ||
}; | ||
@@ -54,3 +55,3 @@ this.settings = _.assign(this.defaultSettings, options); | ||
} | ||
lines = lines.filter(line => line.length > 0); //Remove newely blank lines | ||
lines = lines.filter(line => line.length > 0); //Remove newly blank lines | ||
@@ -66,2 +67,4 @@ //Output object | ||
var speaker = 'none'; | ||
//Are we ignoring the line because of a blacklisted speaker? | ||
var ignore = false; | ||
@@ -74,16 +77,14 @@ for(var i = 0; i < lines.length; i++) { | ||
lines[i] = lines[i].replace(this.regex.speaker, ''); | ||
//Ignore the speaker if he is in our blacklist | ||
ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
} | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) continue; | ||
//If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker) && | ||
//And the speaker is defined or the setting to remove undefined speakers is false | ||
(speaker !== 'none' || !this.settings.removeUnknownSpeakers)) { | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//If the speaker is defined or the setting to remove undefined speakers is false | ||
if(speaker !== 'none' || !this.settings.removeUnknownSpeakers) { | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(lines[i]); | ||
output.order.push(speaker); | ||
} | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(lines[i]); | ||
output.order.push(speaker); | ||
} | ||
@@ -131,2 +132,3 @@ return output; | ||
.then(lines => { | ||
var ignore = false; | ||
return Promise.each(lines, (line) => { | ||
@@ -138,16 +140,14 @@ if(line.match(this.regex.speaker)) { | ||
line = line.replace(this.regex.speaker, ''); | ||
ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
} | ||
//If speaker was blacklisted, return | ||
if(ignore || (speaker === 'none' && this.settings.removeUnknownSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker) && | ||
//And the speaker is defined or the setting to remove undefined speakers is false | ||
(speaker !== 'none' || !this.settings.removeUnknownSpeakers)) { | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
output.speaker[speaker] = []; | ||
} | ||
//If the speaker is defined or the setting to remove undefined speakers is false | ||
if(speaker !== 'none' || !this.settings.removeUnknownSpeakers) { | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
output.order.push(speaker); | ||
} | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
output.speaker[speaker].push(line); | ||
output.order.push(speaker); | ||
}); | ||
@@ -177,3 +177,3 @@ }).then(() => { | ||
_.each(aliases, (regexes, newName) => { | ||
_.each(regexes, (regex, regexKey) => { | ||
_.each(regexes, (regex) => { | ||
//If the regex matches | ||
@@ -279,7 +279,7 @@ if(regex.test(speakerName) && speakerName != newName) { | ||
var line; | ||
var speaker = 'none'; | ||
var ignore = false; | ||
stream.on('readable', () => { | ||
const line = stream.read() | ||
const line = stream.read(); | ||
if(line === null) return cb(null, output); | ||
@@ -294,7 +294,7 @@ | ||
filteredLine = filteredLine.replace(this.regex.speaker, ''); | ||
ignore = (this.settings.blacklist.indexOf(speaker) > -1); | ||
} | ||
if(ignore || (speaker === 'none' && this.settings.removeSpeakers)) return; | ||
//If the speaker's key doesn't already exist | ||
if(!(speaker in output.speaker) && | ||
//And the speaker is defined or the setting to remove undefined speakers is false | ||
(speaker !== 'none' || !this.settings.removeUnknownSpeakers)) { | ||
if(!(speaker in output.speaker)) { | ||
//Set the output's speaker key to a new empty array | ||
@@ -304,3 +304,3 @@ output.speaker[speaker] = []; | ||
//If the speaker is defined or the setting to remove undefined speakers is false | ||
if(speaker !== 'none' || !this.settings.removeUnknownSpeakers) { | ||
if(!this.settings.removeUnknownSpeakers) { | ||
//Add the text to the output speaker's key and speaker name to the order array | ||
@@ -330,4 +330,3 @@ output.speaker[speaker].push(filteredLine); | ||
function removeAll(text, regex) { | ||
return text.split(regex).join(''); | ||
} |
{ | ||
"name": "transcript-parser", | ||
"version": "0.4.1", | ||
"version": "0.5.0", | ||
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.", | ||
@@ -5,0 +5,0 @@ "main": "app.js", |
@@ -12,2 +12,3 @@ 'use strict'; | ||
const should = chai.should(); | ||
const Readable = require('stream').Readable; | ||
@@ -40,2 +41,20 @@ const TEST_DIR = path.join(__dirname, 'transcripts'); | ||
}); | ||
it('should respect the blacklist setting', function(done) { | ||
const rs = new Readable; | ||
const parser = new TranscriptParser({blacklist: [ 'B' ]}); | ||
const testStr = 'A: Blah blah blah\nB: This should be\nignored\nA: Blah blah'; | ||
rs.push(testStr); | ||
rs.push(null); | ||
Promise.fromCallback(cb => parser.parseStream(rs, cb)) | ||
.then(parsed => { | ||
parsed.should.eql({ | ||
speaker: { | ||
A: ['Blah blah blah', 'Blah blah'], | ||
}, | ||
order: ['A', 'A'] | ||
}); | ||
done(); | ||
}); | ||
}); | ||
}); | ||
@@ -112,2 +131,13 @@ | ||
it('should respect the blacklist setting', function() { | ||
const parser = new TranscriptParser({blacklist: [ 'B' ]}); | ||
const testStr = 'A: Blah blah blah\nB: This should be\nignored\nA: Blah blah'; | ||
parser.parseOneSync(testStr).should.eql({ | ||
speaker: { | ||
A: ['Blah blah blah', 'Blah blah'], | ||
}, | ||
order: ['A', 'A'] | ||
}); | ||
}); | ||
}); | ||
@@ -238,2 +268,16 @@ | ||
it('should respect the blacklist setting', function(done) { | ||
const parser = new TranscriptParser({blacklist: [ 'B' ]}); | ||
const testStr = 'A: Blah blah blah\nB: This should be\nignored\nA: Blah blah'; | ||
parser.parseOne(testStr).then(parsed => { | ||
parsed.should.eql({ | ||
speaker: { | ||
A: ['Blah blah blah', 'Blah blah'], | ||
}, | ||
order: ['A', 'A'] | ||
}); | ||
done(); | ||
}); | ||
}); | ||
}); | ||
@@ -240,0 +284,0 @@ |
488957
804