transcript-parser
Advanced tools
Comparing version 0.0.3 to 0.1.0
39
app.js
@@ -18,3 +18,4 @@ "use strict"; | ||
removeTimestamps: true, //Overriden by removeAnnotations | ||
removeUnknownSpeaker: false | ||
removeUnknownSpeaker: false, | ||
aliases: {} | ||
}; | ||
@@ -73,3 +74,39 @@ this.settings = _.assign(this.defaultSettings, options); | ||
proto.resolveAliases = function(data) { | ||
var aliases = this.settings.aliases; | ||
if(!aliases) return; | ||
var transcript = data.speaker; | ||
for(var speaker in transcript) { | ||
for(var trueName in aliases) { | ||
for(var aliasKey in aliases[trueName]) { | ||
var alias = aliases[trueName][aliasKey]; | ||
//If the regex matches | ||
transcript[trueName] = transcript[trueName] ? | ||
_.concat(transcript[trueName], transcript[speaker]) : | ||
transcript[trueName] = transcript[speaker]; | ||
delete transcript[speaker]; | ||
break; | ||
} | ||
} | ||
} | ||
data.order = data.order.map(speaker => { | ||
for(trueName in aliases) { | ||
for(var aliasKey in aliases[trueName]) { | ||
if(speaker.search(aliases[trueName][aliasKey]) !== -1) { | ||
return trueName; | ||
} | ||
} | ||
} | ||
return speaker; | ||
}); | ||
return data; | ||
}; | ||
module.exports = TranscriptParser; |
{ | ||
"name": "transcript-parser", | ||
"version": "0.0.3", | ||
"version": "0.1.0", | ||
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.", | ||
@@ -5,0 +5,0 @@ "main": "app.js", |
"use strict"; | ||
const Promise = require('bluebird'); | ||
const path = require('path'); | ||
const fs = Promise.promisifyAll(require('fs')); | ||
@@ -8,4 +9,8 @@ const TranscriptParser = require('../app.js'); | ||
const TEST_DIR = path.join(__dirname, 'transcripts'); | ||
const EXPECTED_DIR = path.join(__dirname, 'expected'); | ||
describe('TranscriptParser', function() { | ||
describe('contructor', function() { | ||
@@ -34,7 +39,10 @@ it('should remove actions by default', function() { | ||
const tp = new TranscriptParser(); | ||
it('should parse a transcript with no errors', function(done) { | ||
fs.readFileAsync('test/transcripts/sample_1.txt', {encoding: 'UTF-8'}) | ||
it('should parse a transcript correctly', function(done) { | ||
readSample(1) | ||
.bind({}) | ||
.then(info => { | ||
const result = tp.parseOne(info); | ||
result.should.not.equal(false); | ||
this.result = tp.parseOne(info); | ||
return readExpected(1); | ||
}).then(expected => { | ||
this.result.should.be.eql(JSON.parse(expected)); | ||
done(); | ||
@@ -45,2 +53,30 @@ }) | ||
}); | ||
}); | ||
describe('#resolveAliases()', function () { | ||
const tp = new TranscriptParser({ | ||
aliases: { "DONALD TRUMP": [ /.*TRUMP.*/ ] } | ||
}); | ||
it('should resolve aliases correctly', function(done) { | ||
readSample(2) | ||
.bind({}) | ||
.then(info => { | ||
this.result = tp.parseOne(info); | ||
this.result = tp.resolveAliases(this.result); | ||
return readExpected(2); | ||
}).then(expected => { | ||
this.result.should.eql(JSON.parse(expected)); | ||
done(); | ||
}) | ||
.catch(e => done(e)); | ||
}); | ||
}); | ||
}); | ||
function readSample(sampleName) { | ||
return fs.readFileAsync(path.join(TEST_DIR, sampleName+'.txt'), {encoding: 'UTF-8'}); | ||
} | ||
function readExpected(expectedName) { | ||
return fs.readFileAsync(path.join(EXPECTED_DIR, expectedName+'.txt'), {encoding: 'UTF-8'}); | ||
} |
211377
17
210