matroska-subtitles
Advanced tools
Comparing version 1.1.2 to 2.0.0
191
index.js
@@ -0,130 +1,135 @@ | ||
const Writable = require('stream').Writable | ||
const ebml = require('ebml') | ||
const ebmlBlock = require('ebml-block') | ||
const through = require('through2') | ||
const readElement = require('./lib/read-element') | ||
// track elements we care about | ||
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate'] | ||
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS'] | ||
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text'] | ||
module.exports = function () { | ||
const subtitleTracks = new Map() | ||
const decoder = new ebml.Decoder() | ||
class MatroskaSubtitles extends Writable { | ||
constructor (prevInstance) { | ||
super() | ||
var timecodeScale = 1 | ||
var currentTrack = null | ||
var currentSubtitleBlock = null | ||
var currentClusterTimecode = null | ||
var currentTrack | ||
var currentSubtitleBlock | ||
var currentClusterTimecode | ||
this.decoder = new ebml.Decoder() | ||
decoder.on('data', function (chunk) { | ||
// Segment Information // | ||
if (chunk[1].name === 'TimecodeScale') { | ||
timecodeScale = readData(chunk) / 1000000 | ||
if (prevInstance instanceof MatroskaSubtitles) { | ||
prevInstance.end() | ||
// copy previous metadata | ||
this.subtitleTracks = prevInstance.subtitleTracks | ||
this.timecodeScale = prevInstance.timecodeScale | ||
this.decoder.on('data', _onClusterData) | ||
} else { | ||
this.subtitleTracks = new Map() | ||
this.timecodeScale = 1 | ||
this.decoder.on('data', _onMetaData) | ||
} | ||
// Clusters // | ||
var self = this | ||
// TODO: assuming this is a Cluster `Timecode` | ||
if (chunk[1].name === 'Timecode') { | ||
currentClusterTimecode = readData(chunk) | ||
} | ||
function _onMetaData (chunk) { | ||
// Segment Information | ||
if (chunk[1].name === 'TimecodeScale') { | ||
self.timecodeScale = readElement(chunk[1]) / 1000000 | ||
} | ||
// Tracks // | ||
// Tracks | ||
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') { | ||
currentTrack = {} | ||
} | ||
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') { | ||
currentTrack = {} | ||
} | ||
if (currentTrack && chunk[0] === 'tag') { | ||
// save info about track currently being scanned | ||
if (TRACK_ELEMENTS.includes(chunk[1].name)) { | ||
currentTrack[chunk[1].name] = readElement(chunk[1]) | ||
} | ||
} | ||
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') { | ||
// 0x11: Subtitle Track, S_TEXT/UTF8: SRT format | ||
if (currentTrack.TrackType === 0x11) { | ||
if (currentTrack.CodecID === 'S_TEXT/UTF8' || currentTrack.CodecID === 'S_TEXT/ASS') { | ||
subtitleTracks.set(currentTrack.TrackNumber, currentTrack.CodecID) | ||
var info = { | ||
track: currentTrack.TrackNumber, | ||
language: currentTrack.Language, | ||
type: currentTrack.CodecID.substring(7) | ||
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') { | ||
if (currentTrack.TrackType === 0x11) { // Subtitle Track | ||
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) { | ||
var track = { | ||
number: currentTrack.TrackNumber, | ||
language: currentTrack.Language, | ||
type: currentTrack.CodecID.substring(7).toLowerCase() | ||
} | ||
if (currentTrack.CodecPrivate) { | ||
// only SSA/ASS | ||
track.header = currentTrack.CodecPrivate.toString('utf8') | ||
} | ||
self.subtitleTracks.set(currentTrack.TrackNumber, track) | ||
} | ||
if (currentTrack.CodecPrivate) { | ||
// only SSA/ASS | ||
info.header = currentTrack.CodecPrivate.toString('utf8') | ||
} | ||
stream.push(['new', info]) | ||
} | ||
currentTrack = null | ||
} | ||
currentTrack = null | ||
} | ||
if (currentTrack && chunk[0] === 'tag') { | ||
// save info about track currently being scanned | ||
if (TRACK_ELEMENTS.includes(chunk[1].name)) { | ||
currentTrack[chunk[1].name] = readData(chunk) | ||
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') { | ||
self.decoder.removeListener('data', _onMetaData) | ||
if (self.subtitleTracks.size <= 0) return self.end() | ||
self.decoder.on('data', _onClusterData) | ||
self.emit('tracks', Array.from(self.subtitleTracks.values())) | ||
} | ||
} | ||
// Blocks // | ||
function _onClusterData (chunk) { | ||
// TODO: assuming this is a Cluster `Timecode` | ||
if (chunk[1].name === 'Timecode') { | ||
currentClusterTimecode = readElement(chunk[1]) | ||
} | ||
if (chunk[1].name === 'Block') { | ||
var block = ebmlBlock(chunk[1].data) | ||
if (chunk[1].name === 'Block') { | ||
var block = ebmlBlock(chunk[1].data) | ||
if (subtitleTracks.has(block.trackNumber)) { | ||
var type = subtitleTracks.get(block.trackNumber) | ||
if (self.subtitleTracks.has(block.trackNumber)) { | ||
var type = self.subtitleTracks.get(block.trackNumber).type | ||
// TODO: would a subtitle track ever use lacing? We just take the first (only) frame. | ||
var subtitle = { | ||
text: block.frames[0].toString('utf8'), | ||
time: (block.timecode + currentClusterTimecode) * timecodeScale | ||
} | ||
var subtitle = { | ||
text: block.frames[0].toString('utf8'), | ||
time: (block.timecode + currentClusterTimecode) * self.timecodeScale | ||
} | ||
if (type === 'S_TEXT/ASS') { | ||
// extract ASS keys | ||
var values = subtitle.text.split(',') | ||
// ignore read-order | ||
for (var i = 1; i < 9; i++) { | ||
subtitle[ASS_KEYS[i]] = values[i] | ||
if (type === 'ass' || type === 'ssa') { | ||
// extract SSA/ASS keys | ||
var values = subtitle.text.split(',') | ||
// ignore read-order, and skip layer if ssa | ||
var i = type === 'ssa' ? 2 : 1 | ||
for (; i < 9; i++) { | ||
subtitle[ASS_KEYS[i]] = values[i] | ||
} | ||
// re-append extra text that might have been splitted | ||
for (i = 9; i < values.length; i++) { | ||
subtitle.text += ',' + values[i] | ||
} | ||
} | ||
// re-append extra text that might have been splitted | ||
for (i = 9; i < values.length; i++) { | ||
subtitle.text += ',' + values[i] | ||
} | ||
currentSubtitleBlock = [subtitle, block.trackNumber] | ||
} | ||
currentSubtitleBlock = [block.trackNumber, subtitle] | ||
} | ||
} | ||
// TODO: assuming `BlockDuration` exists and always comes after `Block` | ||
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') { | ||
currentSubtitleBlock[1].duration = readData(chunk) * timecodeScale | ||
// TODO: assuming `BlockDuration` exists and always comes after `Block` | ||
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') { | ||
currentSubtitleBlock[0].duration = readElement(chunk[1]) * self.timecodeScale | ||
stream.push(currentSubtitleBlock) | ||
self.emit('subtitle', ...currentSubtitleBlock) | ||
currentSubtitleBlock = null | ||
currentSubtitleBlock = null | ||
} | ||
} | ||
}) | ||
} | ||
// create object stream | ||
var stream = through.obj(function write (chunk, _, callback) { | ||
decoder.write(chunk) | ||
callback() | ||
}) | ||
return stream | ||
_write (chunk, _, callback) { | ||
this.decoder.write(chunk) | ||
callback(null) | ||
} | ||
} | ||
function readData (chunk) { | ||
switch (chunk[1].type) { | ||
case 'b': | ||
return chunk[1].data | ||
case 's': | ||
return chunk[1].data.toString('ascii') | ||
case '8': | ||
return chunk[1].data.toString('utf8') | ||
case 'u': | ||
return chunk[1].data.readUIntBE(0, chunk[1].dataSize) | ||
default: | ||
console.error('Unsupported data:', chunk) | ||
} | ||
} | ||
module.exports = MatroskaSubtitles |
{ | ||
"name": "matroska-subtitles", | ||
"version": "1.1.2", | ||
"description": "Transform stream for parsing embedded .mkv subtitles.", | ||
"version": "2.0.0", | ||
"description": "Writable stream for parsing embedded .mkv subtitles.", | ||
"main": "index.js", | ||
"dependencies": { | ||
"ebml": "^2.2.0", | ||
"ebml-block": "^1.0.0", | ||
"through2": "^2.0.1" | ||
"ebml-block": "^1.0.0" | ||
}, | ||
@@ -32,3 +31,6 @@ "devDependencies": {}, | ||
}, | ||
"homepage": "https://github.com/mathiasvr/matroska-subtitles#readme" | ||
"homepage": "https://github.com/mathiasvr/matroska-subtitles#readme", | ||
"directories": { | ||
"example": "examples" | ||
} | ||
} |
106
README.md
@@ -10,102 +10,62 @@ # matroska-subtitles [![npm][npm-img]][npm-url] [![dependencies][dep-img]][dep-url] [![license][lic-img]][lic-url] | ||
Transform stream for parsing embedded .mkv subtitles. | ||
Writable stream for parsing embedded .mkv subtitles. | ||
> Currently supports extraction of the .srt and .ass format. | ||
Supported formats: `.srt`, `.ssa`, `.ass`. | ||
## install | ||
``` | ||
```bash | ||
npm install matroska-subtitles | ||
``` | ||
## documentation | ||
## example | ||
The `data` event of the stream will emit an array that determines the type of the data. | ||
When a new subtitle track is encountered the *track number*, *language*, *type* and optionally a *header* is emitted: | ||
``` | ||
data = [ 'new', { track: <track number>, language: <string>, type: <string>, header: <string> } ] | ||
``` | ||
Subsequently a specific subtitle track will emit data of this form: | ||
``` | ||
data = [ <track number>, { text: <string>, time: <ms>, duration: <ms> } ] | ||
``` | ||
## examples | ||
### dump all subtitles | ||
```javascript | ||
const fs = require('fs') | ||
const matroskaSubtitles = require('matroska-subtitles') | ||
const MatroskaSubtitles = require('matroska-subtitles') | ||
var subs = matroskaSubtitles() | ||
var parser = new MatroskaSubtitles() | ||
subs.on('data', function (data) { | ||
console.log(data) | ||
// first an array of subtitle track information is emitted | ||
parser.once('tracks', function (tracks) { | ||
console.log(tracks) | ||
}) | ||
fs.createReadStream('Sintel.2010.720p.mkv').pipe(subs) | ||
// afterwards each subtitle is emitted | ||
parser.on('subtitle', function (subtitle, trackNumber) { | ||
console.log('Track ' + trackNumber + ':', subtitle) | ||
}) | ||
fs.createReadStream('Sintel.2010.720p.mkv').pipe(parser) | ||
``` | ||
### group subtitle tracks | ||
### `tracks` event response format | ||
The following is an example of extracting subtitle tracks of an mkv: | ||
```javascript | ||
const fs = require('fs') | ||
const matroskaSubtitles = require('matroska-subtitles') | ||
var tracks = new Map() | ||
var subs = matroskaSubtitles() | ||
subs.on('data', function (data) { | ||
if (data[0] === 'new') { | ||
var key = data[1].track | ||
tracks.set(key, { | ||
language: data[1].language, | ||
subtitles: [] | ||
}) | ||
} else { | ||
var key = data[0] | ||
var subtitle = data[1] | ||
tracks.get(key).subtitles.push(subtitle) | ||
} | ||
}) | ||
subs.on('end', function () { | ||
tracks.forEach((track) => console.log(track)) | ||
}) | ||
fs.createReadStream('Sintel.2010.720p.mkv').pipe(subs) | ||
[ | ||
{ number: 3, language: 'eng', type: 'utf8' }, | ||
{ number: 4, language: 'jpn', type: 'ass', header: '[Script Info]\r\n...' } | ||
] | ||
``` | ||
> Notice that this example doesn't take advantage of streaming since the subtitles first are being outputted when the stream ends. | ||
> Note that the `language` may be `undefined` if the mkv track doesn't specify it. | ||
### response | ||
### `subtitle` event response format | ||
The response of this example would look like this: | ||
```javascript | ||
{ language: 'eng', | ||
subtitles: | ||
[ { text: 'This blade has a dark past.', | ||
time: 107250, | ||
duration: 1970 }, | ||
{ text: 'It has shed much innocent blood.', | ||
time: 111800, | ||
duration: 4000 }, | ||
{ text: 'You\'re a fool for traveling alone,\r\nso completely unprepared.', | ||
time: 118000, | ||
duration: 3450 } ] } | ||
{ | ||
text: 'This blade has a dark past.', | ||
time: 107250, // ms | ||
duration: 1970 // ms | ||
} | ||
``` | ||
> Note that the `language` might be `undefined` if the mkv track has not specified it. | ||
> May also contain additional `.ass` specific values | ||
## contributing | ||
## random access | ||
The parser must obtain the `tracks` metadata event before it can begin to emit subtitles. | ||
To read subtitles from a specific position in the stream, | ||
you can pass in a previous instance as parameter: `parser = new MatroskaSubtitles(parser)` | ||
after the `tracks` event and pipe from a given position. See `examples/random-access.js` for an example. | ||
This is still a work in progress. | ||
If you find a bug or have suggestions feel free to create an issue or a pull request! | ||
## see also | ||
@@ -112,0 +72,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
10428
2
8
170
77
3
1
- Removedthrough2@^2.0.1
- Removedcore-util-is@1.0.3(transitive)
- Removedinherits@2.0.4(transitive)
- Removedisarray@1.0.0(transitive)
- Removedprocess-nextick-args@2.0.1(transitive)
- Removedreadable-stream@2.3.8(transitive)
- Removedsafe-buffer@5.1.2(transitive)
- Removedstring_decoder@1.1.1(transitive)
- Removedthrough2@2.0.5(transitive)
- Removedutil-deprecate@1.0.2(transitive)
- Removedxtend@4.0.2(transitive)