Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

matroska-subtitles

Package Overview
Dependencies
Maintainers
1
Versions
20
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

matroska-subtitles - npm Package Compare versions

Comparing version 1.1.2 to 2.0.0

examples/basic.js

191

index.js

@@ -0,130 +1,135 @@

const Writable = require('stream').Writable
const ebml = require('ebml')
const ebmlBlock = require('ebml-block')
const through = require('through2')
const readElement = require('./lib/read-element')
// track elements we care about
const TRACK_ELEMENTS = ['TrackNumber', 'TrackType', 'Language', 'CodecID', 'CodecPrivate']
const SUBTITLE_TYPES = ['S_TEXT/UTF8', 'S_TEXT/SSA', 'S_TEXT/ASS']
const ASS_KEYS = ['readOrder', 'layer', 'style', 'name', 'marginL', 'marginR', 'marginV', 'effect', 'text']
module.exports = function () {
const subtitleTracks = new Map()
const decoder = new ebml.Decoder()
class MatroskaSubtitles extends Writable {
constructor (prevInstance) {
super()
var timecodeScale = 1
var currentTrack = null
var currentSubtitleBlock = null
var currentClusterTimecode = null
var currentTrack
var currentSubtitleBlock
var currentClusterTimecode
this.decoder = new ebml.Decoder()
decoder.on('data', function (chunk) {
// Segment Information //
if (chunk[1].name === 'TimecodeScale') {
timecodeScale = readData(chunk) / 1000000
if (prevInstance instanceof MatroskaSubtitles) {
prevInstance.end()
// copy previous metadata
this.subtitleTracks = prevInstance.subtitleTracks
this.timecodeScale = prevInstance.timecodeScale
this.decoder.on('data', _onClusterData)
} else {
this.subtitleTracks = new Map()
this.timecodeScale = 1
this.decoder.on('data', _onMetaData)
}
// Clusters //
var self = this
// TODO: assuming this is a Cluster `Timecode`
if (chunk[1].name === 'Timecode') {
currentClusterTimecode = readData(chunk)
}
function _onMetaData (chunk) {
// Segment Information
if (chunk[1].name === 'TimecodeScale') {
self.timecodeScale = readElement(chunk[1]) / 1000000
}
// Tracks //
// Tracks
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
currentTrack = {}
}
if (chunk[0] === 'start' && chunk[1].name === 'TrackEntry') {
currentTrack = {}
}
if (currentTrack && chunk[0] === 'tag') {
// save info about track currently being scanned
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
currentTrack[chunk[1].name] = readElement(chunk[1])
}
}
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
// 0x11: Subtitle Track, S_TEXT/UTF8: SRT format
if (currentTrack.TrackType === 0x11) {
if (currentTrack.CodecID === 'S_TEXT/UTF8' || currentTrack.CodecID === 'S_TEXT/ASS') {
subtitleTracks.set(currentTrack.TrackNumber, currentTrack.CodecID)
var info = {
track: currentTrack.TrackNumber,
language: currentTrack.Language,
type: currentTrack.CodecID.substring(7)
if (chunk[0] === 'end' && chunk[1].name === 'TrackEntry') {
if (currentTrack.TrackType === 0x11) { // Subtitle Track
if (SUBTITLE_TYPES.includes(currentTrack.CodecID)) {
var track = {
number: currentTrack.TrackNumber,
language: currentTrack.Language,
type: currentTrack.CodecID.substring(7).toLowerCase()
}
if (currentTrack.CodecPrivate) {
// only SSA/ASS
track.header = currentTrack.CodecPrivate.toString('utf8')
}
self.subtitleTracks.set(currentTrack.TrackNumber, track)
}
if (currentTrack.CodecPrivate) {
// only SSA/ASS
info.header = currentTrack.CodecPrivate.toString('utf8')
}
stream.push(['new', info])
}
currentTrack = null
}
currentTrack = null
}
if (currentTrack && chunk[0] === 'tag') {
// save info about track currently being scanned
if (TRACK_ELEMENTS.includes(chunk[1].name)) {
currentTrack[chunk[1].name] = readData(chunk)
if (chunk[0] === 'end' && chunk[1].name === 'Tracks') {
self.decoder.removeListener('data', _onMetaData)
if (self.subtitleTracks.size <= 0) return self.end()
self.decoder.on('data', _onClusterData)
self.emit('tracks', Array.from(self.subtitleTracks.values()))
}
}
// Blocks //
function _onClusterData (chunk) {
// TODO: assuming this is a Cluster `Timecode`
if (chunk[1].name === 'Timecode') {
currentClusterTimecode = readElement(chunk[1])
}
if (chunk[1].name === 'Block') {
var block = ebmlBlock(chunk[1].data)
if (chunk[1].name === 'Block') {
var block = ebmlBlock(chunk[1].data)
if (subtitleTracks.has(block.trackNumber)) {
var type = subtitleTracks.get(block.trackNumber)
if (self.subtitleTracks.has(block.trackNumber)) {
var type = self.subtitleTracks.get(block.trackNumber).type
// TODO: would a subtitle track ever use lacing? We just take the first (only) frame.
var subtitle = {
text: block.frames[0].toString('utf8'),
time: (block.timecode + currentClusterTimecode) * timecodeScale
}
var subtitle = {
text: block.frames[0].toString('utf8'),
time: (block.timecode + currentClusterTimecode) * self.timecodeScale
}
if (type === 'S_TEXT/ASS') {
// extract ASS keys
var values = subtitle.text.split(',')
// ignore read-order
for (var i = 1; i < 9; i++) {
subtitle[ASS_KEYS[i]] = values[i]
if (type === 'ass' || type === 'ssa') {
// extract SSA/ASS keys
var values = subtitle.text.split(',')
// ignore read-order, and skip layer if ssa
var i = type === 'ssa' ? 2 : 1
for (; i < 9; i++) {
subtitle[ASS_KEYS[i]] = values[i]
}
// re-append extra text that might have been splitted
for (i = 9; i < values.length; i++) {
subtitle.text += ',' + values[i]
}
}
// re-append extra text that might have been splitted
for (i = 9; i < values.length; i++) {
subtitle.text += ',' + values[i]
}
currentSubtitleBlock = [subtitle, block.trackNumber]
}
currentSubtitleBlock = [block.trackNumber, subtitle]
}
}
// TODO: assuming `BlockDuration` exists and always comes after `Block`
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
currentSubtitleBlock[1].duration = readData(chunk) * timecodeScale
// TODO: assuming `BlockDuration` exists and always comes after `Block`
if (currentSubtitleBlock && chunk[1].name === 'BlockDuration') {
currentSubtitleBlock[0].duration = readElement(chunk[1]) * self.timecodeScale
stream.push(currentSubtitleBlock)
self.emit('subtitle', ...currentSubtitleBlock)
currentSubtitleBlock = null
currentSubtitleBlock = null
}
}
})
}
// create object stream
var stream = through.obj(function write (chunk, _, callback) {
decoder.write(chunk)
callback()
})
return stream
_write (chunk, _, callback) {
this.decoder.write(chunk)
callback(null)
}
}
function readData (chunk) {
switch (chunk[1].type) {
case 'b':
return chunk[1].data
case 's':
return chunk[1].data.toString('ascii')
case '8':
return chunk[1].data.toString('utf8')
case 'u':
return chunk[1].data.readUIntBE(0, chunk[1].dataSize)
default:
console.error('Unsupported data:', chunk)
}
}
module.exports = MatroskaSubtitles
{
"name": "matroska-subtitles",
"version": "1.1.2",
"description": "Transform stream for parsing embedded .mkv subtitles.",
"version": "2.0.0",
"description": "Writable stream for parsing embedded .mkv subtitles.",
"main": "index.js",
"dependencies": {
"ebml": "^2.2.0",
"ebml-block": "^1.0.0",
"through2": "^2.0.1"
"ebml-block": "^1.0.0"
},

@@ -32,3 +31,6 @@ "devDependencies": {},

},
"homepage": "https://github.com/mathiasvr/matroska-subtitles#readme"
"homepage": "https://github.com/mathiasvr/matroska-subtitles#readme",
"directories": {
"example": "examples"
}
}

@@ -10,102 +10,62 @@ # matroska-subtitles [![npm][npm-img]][npm-url] [![dependencies][dep-img]][dep-url] [![license][lic-img]][lic-url]

Transform stream for parsing embedded .mkv subtitles.
Writable stream for parsing embedded .mkv subtitles.
> Currently supports extraction of the .srt and .ass format.
Supported formats: `.srt`, `.ssa`, `.ass`.
## install
```
```bash
npm install matroska-subtitles
```
## documentation
## example
The `data` event of the stream will emit an array that determines the type of the data.
When a new subtitle track is encountered the *track number*, *language*, *type* and optionally a *header* is emitted:
```
data = [ 'new', { track: <track number>, language: <string>, type: <string>, header: <string> } ]
```
Subsequently a specific subtitle track will emit data of this form:
```
data = [ <track number>, { text: <string>, time: <ms>, duration: <ms> } ]
```
## examples
### dump all subtitles
```javascript
const fs = require('fs')
const matroskaSubtitles = require('matroska-subtitles')
const MatroskaSubtitles = require('matroska-subtitles')
var subs = matroskaSubtitles()
var parser = new MatroskaSubtitles()
subs.on('data', function (data) {
console.log(data)
// first an array of subtitle track information is emitted
parser.once('tracks', function (tracks) {
console.log(tracks)
})
fs.createReadStream('Sintel.2010.720p.mkv').pipe(subs)
// afterwards each subtitle is emitted
parser.on('subtitle', function (subtitle, trackNumber) {
console.log('Track ' + trackNumber + ':', subtitle)
})
fs.createReadStream('Sintel.2010.720p.mkv').pipe(parser)
```
### group subtitle tracks
### `tracks` event response format
The following is an example of extracting subtitle tracks of an mkv:
```javascript
const fs = require('fs')
const matroskaSubtitles = require('matroska-subtitles')
var tracks = new Map()
var subs = matroskaSubtitles()
subs.on('data', function (data) {
if (data[0] === 'new') {
var key = data[1].track
tracks.set(key, {
language: data[1].language,
subtitles: []
})
} else {
var key = data[0]
var subtitle = data[1]
tracks.get(key).subtitles.push(subtitle)
}
})
subs.on('end', function () {
tracks.forEach((track) => console.log(track))
})
fs.createReadStream('Sintel.2010.720p.mkv').pipe(subs)
[
{ number: 3, language: 'eng', type: 'utf8' },
{ number: 4, language: 'jpn', type: 'ass', header: '[Script Info]\r\n...' }
]
```
> Notice that this example doesn't take advantage of streaming since the subtitles first are being outputted when the stream ends.
> Note that the `language` may be `undefined` if the mkv track doesn't specify it.
### response
### `subtitle` event response format
The response of this example would look like this:
```javascript
{ language: 'eng',
subtitles:
[ { text: 'This blade has a dark past.',
time: 107250,
duration: 1970 },
{ text: 'It has shed much innocent blood.',
time: 111800,
duration: 4000 },
{ text: 'You\'re a fool for traveling alone,\r\nso completely unprepared.',
time: 118000,
duration: 3450 } ] }
{
text: 'This blade has a dark past.',
time: 107250, // ms
duration: 1970 // ms
}
```
> Note that the `language` might be `undefined` if the mkv track has not specified it.
> May also contain additional `.ass` specific values
## contributing
## random access
The parser must obtain the `tracks` metadata event before it can begin to emit subtitles.
To read subtitles from a specific position in the stream,
you can pass in a previous instance as parameter: `parser = new MatroskaSubtitles(parser)`
after the `tracks` event and pipe from a given position. See `examples/random-access.js` for an example.
This is still a work in progress.
If you find a bug or have suggestions feel free to create an issue or a pull request!
## see also

@@ -112,0 +72,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc