Socket
Socket
Sign inDemoInstall

transcript-parser

Package Overview
Dependencies
3
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.0.1 to 0.0.2

38

app.js

@@ -15,3 +15,6 @@ "use strict";

this.defaultSettings = {
removeActions: true
removeActions: true,
removeAnnotations: true,
removeTimestamps: true, //Overriden by removeAnnotations
removeUnknownSpeaker: false
};

@@ -21,4 +24,4 @@ this.settings = _.assign(this.defaultSettings, options);

newLine: /\r?\n/,
newLineOrAction: /(?:\r?\n|\([A-Z\ ]+\))/,
speaker: /^([A-Z\d\ \/,.\-\(\)]+)(?: \[.+\])?:/,
action: /\([A-Z\ ]+\)\ ?/,
speaker: /^([A-Z\d\ \/,.\-\(\)]+)(?: \[.+\])?:\ ?/,
timestamp: /\[\d{1,2}:\d{1,2}:\d{1,2}\]\ ?/,

@@ -33,12 +36,21 @@ annotation: /\[.+?\]\ ?/

proto.parseOne = function(transcript) {
const lines = transcript.split(this.settings.removeActions?
this.regex.newLineOrAction : this.regex.newLine)
//Remove blank lines
.filter(line => line.length > 0)
var lines = transcript.split(this.regex.newLine)
.filter(line => line.length > 0); //Remove blank lines
lines = (this.settings.removeActions) ? lines.map(line => line.split(this.regex.action).join('')): lines;
if(this.settings.removeAnnotations) {
//Remove annotations
.map(line => line.split(this.regex.annotation).join(''));
lines = lines.map(line => line.split(this.regex.annotation).join(''));
} else if(this.settings.removeTimestamps) {
//Remove timestamps
lines = lines.map(line => line.split(this.regex.timestamp).join(''));
}
//Output object
const output = {};
//Object containing the speakers and their lines
output.speaker = {};
//List of the speakers, in order
output.order = [];
//Current speaker
var speaker = 'none';

@@ -51,8 +63,10 @@

}
if(!(speaker in output.speaker)) {
if(!(speaker in output.speaker) &&
(!this.settings.removeUnknownSpeaker || speaker !== 'none')) {
output.speaker[speaker] = [];
}
output.speaker[speaker].push(lines[i]);
output.order.push(speaker);
if(!this.settings.removeUnknownSpeaker || speaker !== 'none') {
output.speaker[speaker].push(lines[i]);
output.order.push(speaker);
}
}

@@ -59,0 +73,0 @@ return output;

{
"name": "transcript-parser",
"version": "0.0.1",
"version": "0.0.2",
"description": "Parses plaintext speech/debate/radio transcripts into JavaScript objects.",

@@ -5,0 +5,0 @@ "main": "app.js",

transcript-parser
=================
[![Build Status](https://travis-ci.org/willshiao/transcript-parser.svg?branch=master)](https://travis-ci.org/willshiao/transcript-parser)
Parses plaintext speech/debate/radio transcripts into JavaScript objects.
## Description
Parses plaintext speech/debate/radio transcripts into JavaScript objects. It is still in early development and is not stable. Pull requests are welcome.
## Usage
`npm install transcript-parser`
const fs = require('fs');
const TranscriptParser = require('transcript-parser');
const tp = new TranscriptParser();
//Do not use readFileSync in production
const output = tp.parseOne(fs.readFileSync('transcript.txt', {encoding: 'UTF-8'}));
console.log(output);
## Config
The constructor for `TranscriptParser` accepts an options argument.
Options:
- removeActions
+ default: `true`
+ Specifies if the parser should remove actions (e.g. "(APPLAUSE)").
- removeAnnotations
+ default: `true`
+ Specifies if the parser should remove annotations (surrounded by `[]`).
- removeTimestamps
+ default: `true`
+ **True if `removeAnnotations` is true**
+ Specifies if the parser should remove timestamps (in the `[##:##:##]` format).
- removeUnknownSpeaker
+ default: `false`
+ Specifies if the parser should remove lines that have no associated speaker.
## Documentation
### .parseOne()
The `parseOne()` method parses a string and returns an object representing it.
#### Syntax
`tp.parseOne(_transcript_)`
##### Parameters
- `transcript`
- The transcript, as a `string`.

@@ -9,4 +9,26 @@ "use strict";

describe('TranscriptParser', function() {
const tp = new TranscriptParser();
describe('#parse()', function(){
describe('contructor', function() {
it('should remove actions by default', function() {
const tp = new TranscriptParser();
var result = tp.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)');
result.speaker.should.eql({
'PERSON A': [
'Hello, my name is Bob.'
]
});
});
it('should respect the removeActions setting', function() {
const tp = new TranscriptParser({removeActions: false});
var result = tp.parseOne('PERSON A: Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)');
result.speaker.should.eql({
'PERSON A': [
'Hello, (PAUSES) (DRINKS WATER) my name is Bob.(APPLAUSE)'
]
});
});
});
describe('#parseOne()', function(){
const tp = new TranscriptParser();
it('should parse a transcript with no errors', function(done) {

@@ -13,0 +35,0 @@ fs.readFileAsync('test/transcripts/sample_1.txt', {encoding: 'UTF-8'})

"use strict";
const TranscriptParser = require('../app.js');
const chai = require('chai');
chai.should();
describe('TranscriptParser', function() {

@@ -19,10 +19,6 @@ const transcriptParser = new TranscriptParser();

describe('.newLineOrAction', function() {
it('should split newlines', function() {
const testStr = 'a\nb\r\nc';
testStr.split(regex.newLineOrAction).should.eql(['a','b','c']);
});
describe('.action', function() {
it('should split actions', function() {
const testStr = 'The(LOUD APPLAUSE)chicken(SILENCE)crossed(LAUGHTER)';
testStr.split(regex.newLineOrAction).should.eql(['The','chicken','crossed','']);
const testStr = 'The (LOUD APPLAUSE) chicken (SILENCE) crossed (LAUGHTER)';
testStr.split(regex.action).should.eql(['The ','chicken ','crossed ','']);
});

@@ -29,0 +25,0 @@ });

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Packages

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc