youtube-transcript
Advanced tools
Comparing version 1.1.0 to 1.2.0
export declare class YoutubeTranscriptError extends Error { | ||
constructor(message: any); | ||
} | ||
export declare class YoutubeTranscriptTooManyRequestError extends YoutubeTranscriptError { | ||
constructor(); | ||
} | ||
export declare class YoutubeTranscriptVideoUnavailableError extends YoutubeTranscriptError { | ||
constructor(videoId: string); | ||
} | ||
export declare class YoutubeTranscriptDisabledError extends YoutubeTranscriptError { | ||
constructor(videoId: string); | ||
} | ||
export declare class YoutubeTranscriptNotAvailableError extends YoutubeTranscriptError { | ||
constructor(videoId: string); | ||
} | ||
export declare class YoutubeTranscriptNotAvailableLanguageError extends YoutubeTranscriptError { | ||
constructor(lang: string, availableLangs: string[], videoId: string); | ||
} | ||
export interface TranscriptConfig { | ||
lang?: string; | ||
country?: string; | ||
} | ||
@@ -12,2 +26,3 @@ export interface TranscriptResponse { | ||
offset: number; | ||
lang?: string; | ||
} | ||
@@ -21,16 +36,6 @@ /** | ||
* @param videoId Video url or video identifier | ||
* @param config Get transcript in another country and language ISO | ||
* @param config Get transcript in a specific language ISO | ||
*/ | ||
static fetchTranscript(videoId: string, config?: TranscriptConfig): Promise<TranscriptResponse[]>; | ||
/** | ||
* Generate tracking params for YTB API | ||
* @param page | ||
* @param config | ||
*/ | ||
private static generateRequest; | ||
/** | ||
* 'base.js' function | ||
*/ | ||
private static generateNonce; | ||
/** | ||
* Retrieve video id from url or string | ||
@@ -37,0 +42,0 @@ * @param videoId video url or video id |
@@ -15,17 +15,3 @@ 'use strict';Object.defineProperty(exports,'__esModule',{value:true});/*! ***************************************************************************** | ||
***************************************************************************** */ | ||
/* global Reflect, Promise */ | ||
var extendStatics = function(d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
function __extends(d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
} | ||
function __awaiter(thisArg, _arguments, P, generator) { | ||
@@ -39,209 +25,101 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } | ||
}); | ||
}const RE_YOUTUBE = /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i; | ||
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)'; | ||
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g; | ||
class YoutubeTranscriptError extends Error { | ||
constructor(message) { | ||
super(`[YoutubeTranscript] 🚨 ${message}`); | ||
} | ||
} | ||
function __generator(thisArg, body) { | ||
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; | ||
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; | ||
function verb(n) { return function (v) { return step([n, v]); }; } | ||
function step(op) { | ||
if (f) throw new TypeError("Generator is already executing."); | ||
while (_) try { | ||
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; | ||
if (y = 0, t) op = [op[0] & 2, t.value]; | ||
switch (op[0]) { | ||
case 0: case 1: t = op; break; | ||
case 4: _.label++; return { value: op[1], done: false }; | ||
case 5: _.label++; y = op[1]; op = [0]; continue; | ||
case 7: op = _.ops.pop(); _.trys.pop(); continue; | ||
default: | ||
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } | ||
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } | ||
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } | ||
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } | ||
if (t[2]) _.ops.pop(); | ||
_.trys.pop(); continue; | ||
} | ||
op = body.call(thisArg, _); | ||
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } | ||
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; | ||
class YoutubeTranscriptTooManyRequestError extends YoutubeTranscriptError { | ||
constructor() { | ||
super('YouTube is receiving too many requests from this IP and now requires solving a captcha to continue'); | ||
} | ||
}var RE_YOUTUBE = /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i; | ||
var USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)'; | ||
var YoutubeTranscriptError = /** @class */ (function (_super) { | ||
__extends(YoutubeTranscriptError, _super); | ||
function YoutubeTranscriptError(message) { | ||
return _super.call(this, "[YoutubeTranscript] \uD83D\uDEA8 " + message) || this; | ||
} | ||
class YoutubeTranscriptVideoUnavailableError extends YoutubeTranscriptError { | ||
constructor(videoId) { | ||
super(`The video is no longer available (${videoId})`); | ||
} | ||
return YoutubeTranscriptError; | ||
}(Error)); | ||
} | ||
class YoutubeTranscriptDisabledError extends YoutubeTranscriptError { | ||
constructor(videoId) { | ||
super(`Transcript is disabled on this video (${videoId})`); | ||
} | ||
} | ||
class YoutubeTranscriptNotAvailableError extends YoutubeTranscriptError { | ||
constructor(videoId) { | ||
super(`No transcripts are available for this video (${videoId})`); | ||
} | ||
} | ||
class YoutubeTranscriptNotAvailableLanguageError extends YoutubeTranscriptError { | ||
constructor(lang, availableLangs, videoId) { | ||
super(`No transcripts are available in ${lang} this video (${videoId}). Available languages: ${availableLangs.join(', ')}`); | ||
} | ||
} | ||
/** | ||
* Class to retrieve transcript if exist | ||
*/ | ||
var YoutubeTranscript = /** @class */ (function () { | ||
function YoutubeTranscript() { | ||
} | ||
class YoutubeTranscript { | ||
/** | ||
* Fetch transcript from YTB Video | ||
* @param videoId Video url or video identifier | ||
* @param config Get transcript in another country and language ISO | ||
* @param config Get transcript in a specific language ISO | ||
*/ | ||
YoutubeTranscript.fetchTranscript = function (videoId, config) { | ||
return __awaiter(this, void 0, void 0, function () { | ||
var identifier, videoPageResponse, videoPageBody, innerTubeApiKey, transcriptResponse, body, transcripts, e_1; | ||
return __generator(this, function (_a) { | ||
switch (_a.label) { | ||
case 0: | ||
identifier = this.retrieveVideoId(videoId); | ||
_a.label = 1; | ||
case 1: | ||
_a.trys.push([1, 7, , 8]); | ||
return [4 /*yield*/, fetch("https://www.youtube.com/watch?v=" + identifier, { | ||
headers: { | ||
'User-Agent': USER_AGENT, | ||
}, | ||
})]; | ||
case 2: | ||
videoPageResponse = _a.sent(); | ||
return [4 /*yield*/, videoPageResponse.text()]; | ||
case 3: | ||
videoPageBody = _a.sent(); | ||
innerTubeApiKey = videoPageBody | ||
.split('"INNERTUBE_API_KEY":"')[1] | ||
.split('"')[0]; | ||
if (!(innerTubeApiKey && innerTubeApiKey.length > 0)) return [3 /*break*/, 6]; | ||
return [4 /*yield*/, fetch("https://www.youtube.com/youtubei/v1/get_transcript?key=" + innerTubeApiKey, { | ||
method: 'POST', | ||
body: JSON.stringify(this.generateRequest(videoPageBody, config)), | ||
headers: { | ||
'Content-Type': 'application/json', | ||
'User-Agent': USER_AGENT, | ||
}, | ||
})]; | ||
case 4: | ||
transcriptResponse = _a.sent(); | ||
return [4 /*yield*/, transcriptResponse.json()]; | ||
case 5: | ||
body = _a.sent(); | ||
if (body.responseContext) { | ||
if (!body.actions) { | ||
throw new Error('Transcript is disabled on this video'); | ||
} | ||
transcripts = body.actions[0].updateEngagementPanelAction.content | ||
.transcriptRenderer.body.transcriptBodyRenderer.cueGroups; | ||
return [2 /*return*/, transcripts.map(function (cue) { return ({ | ||
text: cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer | ||
.cue.simpleText, | ||
duration: parseInt(cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer | ||
.durationMs), | ||
offset: parseInt(cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer | ||
.startOffsetMs), | ||
}); })]; | ||
} | ||
_a.label = 6; | ||
case 6: return [3 /*break*/, 8]; | ||
case 7: | ||
e_1 = _a.sent(); | ||
throw new YoutubeTranscriptError(e_1); | ||
case 8: return [2 /*return*/]; | ||
static fetchTranscript(videoId, config) { | ||
var _a; | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const identifier = this.retrieveVideoId(videoId); | ||
const videoPageResponse = yield fetch(`https://www.youtube.com/watch?v=${identifier}`, { | ||
headers: Object.assign(Object.assign({}, ((config === null || config === void 0 ? void 0 : config.lang) && { 'Accept-Language': config.lang })), { 'User-Agent': USER_AGENT }), | ||
}); | ||
const videoPageBody = yield videoPageResponse.text(); | ||
const splittedHTML = videoPageBody.split('"captions":'); | ||
if (splittedHTML.length <= 1) { | ||
if (videoPageBody.includes('class="g-recaptcha"')) { | ||
throw new YoutubeTranscriptTooManyRequestError(); | ||
} | ||
}); | ||
}); | ||
}; | ||
/** | ||
* Generate tracking params for YTB API | ||
* @param page | ||
* @param config | ||
*/ | ||
YoutubeTranscript.generateRequest = function (page, config) { | ||
var _a, _b, _c, _d; | ||
var params = (_a = page.split('"serializedShareEntity":"')[1]) === null || _a === void 0 ? void 0 : _a.split('"')[0]; | ||
var visitorData = (_b = page.split('"VISITOR_DATA":"')[1]) === null || _b === void 0 ? void 0 : _b.split('"')[0]; | ||
var sessionId = (_c = page.split('"sessionId":"')[1]) === null || _c === void 0 ? void 0 : _c.split('"')[0]; | ||
var clickTrackingParams = (_d = page === null || page === void 0 ? void 0 : page.split('"clickTrackingParams":"')[1]) === null || _d === void 0 ? void 0 : _d.split('"')[0]; | ||
return { | ||
context: { | ||
client: { | ||
hl: (config === null || config === void 0 ? void 0 : config.lang) || 'en', | ||
gl: (config === null || config === void 0 ? void 0 : config.country) || 'US', | ||
visitorData: visitorData, | ||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)', | ||
clientName: 'WEB', | ||
clientVersion: '2.20200925.01.00', | ||
osName: 'Macintosh', | ||
osVersion: '10_15_4', | ||
browserName: 'Chrome', | ||
browserVersion: '85.0f.4183.83', | ||
screenWidthPoints: 1440, | ||
screenHeightPoints: 770, | ||
screenPixelDensity: 2, | ||
utcOffsetMinutes: 120, | ||
userInterfaceTheme: 'USER_INTERFACE_THEME_LIGHT', | ||
connectionType: 'CONN_CELLULAR_3G', | ||
}, | ||
request: { | ||
sessionId: sessionId, | ||
internalExperimentFlags: [], | ||
consistencyTokenJars: [], | ||
}, | ||
user: {}, | ||
clientScreenNonce: this.generateNonce(), | ||
clickTracking: { | ||
clickTrackingParams: clickTrackingParams, | ||
}, | ||
}, | ||
params: params, | ||
}; | ||
}; | ||
/** | ||
* 'base.js' function | ||
*/ | ||
YoutubeTranscript.generateNonce = function () { | ||
var rnd = Math.random().toString(); | ||
var alphabet = 'ABCDEFGHIJKLMOPQRSTUVWXYZabcdefghjijklmnopqrstuvwxyz0123456789'; | ||
var jda = [ | ||
alphabet + '+/=', | ||
alphabet + '+/', | ||
alphabet + '-_=', | ||
alphabet + '-_.', | ||
alphabet + '-_', | ||
]; | ||
var b = jda[3]; | ||
var a = []; | ||
for (var i = 0; i < rnd.length - 1; i++) { | ||
a.push(rnd[i].charCodeAt(i)); | ||
} | ||
var c = ''; | ||
var d = 0; | ||
var m, n, q, r, f, g; | ||
while (d < a.length) { | ||
f = a[d]; | ||
g = d + 1 < a.length; | ||
if (g) { | ||
m = a[d + 1]; | ||
if (!videoPageBody.includes('"playabilityStatus":')) { | ||
throw new YoutubeTranscriptVideoUnavailableError(videoId); | ||
} | ||
throw new YoutubeTranscriptDisabledError(videoId); | ||
} | ||
else { | ||
m = 0; | ||
const captions = (_a = (() => { | ||
try { | ||
return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', '')); | ||
} | ||
catch (e) { | ||
return undefined; | ||
} | ||
})()) === null || _a === void 0 ? void 0 : _a['playerCaptionsTracklistRenderer']; | ||
if (!captions) { | ||
throw new YoutubeTranscriptDisabledError(videoId); | ||
} | ||
n = d + 2 < a.length; | ||
if (n) { | ||
q = a[d + 2]; | ||
if (!('captionTracks' in captions)) { | ||
throw new YoutubeTranscriptNotAvailableError(videoId); | ||
} | ||
else { | ||
q = 0; | ||
if ((config === null || config === void 0 ? void 0 : config.lang) && | ||
!captions.captionTracks.some((track) => track.languageCode === (config === null || config === void 0 ? void 0 : config.lang))) { | ||
throw new YoutubeTranscriptNotAvailableLanguageError(config === null || config === void 0 ? void 0 : config.lang, captions.captionTracks.map((track) => track.languageCode), videoId); | ||
} | ||
r = f >> 2; | ||
f = ((f & 3) << 4) | (m >> 4); | ||
m = ((m & 15) << 2) | (q >> 6); | ||
q &= 63; | ||
if (!n) { | ||
q = 64; | ||
if (!q) { | ||
m = 64; | ||
} | ||
const transcriptURL = ((config === null || config === void 0 ? void 0 : config.lang) ? captions.captionTracks.find((track) => track.languageCode === config.lang) | ||
: captions.captionTracks[0]).baseUrl; | ||
const transcriptResponse = yield fetch(transcriptURL, { | ||
headers: Object.assign(Object.assign({}, ((config === null || config === void 0 ? void 0 : config.lang) && { 'Accept-Language': config.lang })), { 'User-Agent': USER_AGENT }), | ||
}); | ||
if (!transcriptResponse.ok) { | ||
throw new YoutubeTranscriptNotAvailableError(videoId); | ||
} | ||
c += b[r] + b[f] + b[m] + b[q]; | ||
d += 3; | ||
} | ||
return c; | ||
}; | ||
const transcriptBody = yield transcriptResponse.text(); | ||
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)]; | ||
return results.map((result) => { | ||
var _a; | ||
return ({ | ||
text: result[3], | ||
duration: parseFloat(result[2]), | ||
offset: parseFloat(result[1]), | ||
lang: (_a = config === null || config === void 0 ? void 0 : config.lang) !== null && _a !== void 0 ? _a : captions.captionTracks[0].languageCode, | ||
}); | ||
}); | ||
}); | ||
} | ||
/** | ||
@@ -251,7 +129,7 @@ * Retrieve video id from url or string | ||
*/ | ||
YoutubeTranscript.retrieveVideoId = function (videoId) { | ||
static retrieveVideoId(videoId) { | ||
if (videoId.length === 11) { | ||
return videoId; | ||
} | ||
var matchId = videoId.match(RE_YOUTUBE); | ||
const matchId = videoId.match(RE_YOUTUBE); | ||
if (matchId && matchId.length) { | ||
@@ -261,4 +139,3 @@ return matchId[1]; | ||
throw new YoutubeTranscriptError('Impossible to retrieve Youtube video ID.'); | ||
}; | ||
return YoutubeTranscript; | ||
}());exports.YoutubeTranscript=YoutubeTranscript;exports.YoutubeTranscriptError=YoutubeTranscriptError; | ||
} | ||
}exports.YoutubeTranscript=YoutubeTranscript;exports.YoutubeTranscriptDisabledError=YoutubeTranscriptDisabledError;exports.YoutubeTranscriptError=YoutubeTranscriptError;exports.YoutubeTranscriptNotAvailableError=YoutubeTranscriptNotAvailableError;exports.YoutubeTranscriptNotAvailableLanguageError=YoutubeTranscriptNotAvailableLanguageError;exports.YoutubeTranscriptTooManyRequestError=YoutubeTranscriptTooManyRequestError;exports.YoutubeTranscriptVideoUnavailableError=YoutubeTranscriptVideoUnavailableError; |
@@ -15,17 +15,3 @@ /*! ***************************************************************************** | ||
***************************************************************************** */ | ||
/* global Reflect, Promise */ | ||
var extendStatics = function(d, b) { | ||
extendStatics = Object.setPrototypeOf || | ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || | ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; | ||
return extendStatics(d, b); | ||
}; | ||
function __extends(d, b) { | ||
extendStatics(d, b); | ||
function __() { this.constructor = d; } | ||
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); | ||
} | ||
function __awaiter(thisArg, _arguments, P, generator) { | ||
@@ -41,209 +27,101 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } | ||
function __generator(thisArg, body) { | ||
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; | ||
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; | ||
function verb(n) { return function (v) { return step([n, v]); }; } | ||
function step(op) { | ||
if (f) throw new TypeError("Generator is already executing."); | ||
while (_) try { | ||
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; | ||
if (y = 0, t) op = [op[0] & 2, t.value]; | ||
switch (op[0]) { | ||
case 0: case 1: t = op; break; | ||
case 4: _.label++; return { value: op[1], done: false }; | ||
case 5: _.label++; y = op[1]; op = [0]; continue; | ||
case 7: op = _.ops.pop(); _.trys.pop(); continue; | ||
default: | ||
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } | ||
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } | ||
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } | ||
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } | ||
if (t[2]) _.ops.pop(); | ||
_.trys.pop(); continue; | ||
} | ||
op = body.call(thisArg, _); | ||
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } | ||
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; | ||
const RE_YOUTUBE = /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i; | ||
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)'; | ||
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g; | ||
class YoutubeTranscriptError extends Error { | ||
constructor(message) { | ||
super(`[YoutubeTranscript] 🚨 ${message}`); | ||
} | ||
} | ||
var RE_YOUTUBE = /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i; | ||
var USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)'; | ||
var YoutubeTranscriptError = /** @class */ (function (_super) { | ||
__extends(YoutubeTranscriptError, _super); | ||
function YoutubeTranscriptError(message) { | ||
return _super.call(this, "[YoutubeTranscript] \uD83D\uDEA8 " + message) || this; | ||
class YoutubeTranscriptTooManyRequestError extends YoutubeTranscriptError { | ||
constructor() { | ||
super('YouTube is receiving too many requests from this IP and now requires solving a captcha to continue'); | ||
} | ||
return YoutubeTranscriptError; | ||
}(Error)); | ||
} | ||
class YoutubeTranscriptVideoUnavailableError extends YoutubeTranscriptError { | ||
constructor(videoId) { | ||
super(`The video is no longer available (${videoId})`); | ||
} | ||
} | ||
class YoutubeTranscriptDisabledError extends YoutubeTranscriptError { | ||
constructor(videoId) { | ||
super(`Transcript is disabled on this video (${videoId})`); | ||
} | ||
} | ||
class YoutubeTranscriptNotAvailableError extends YoutubeTranscriptError { | ||
constructor(videoId) { | ||
super(`No transcripts are available for this video (${videoId})`); | ||
} | ||
} | ||
class YoutubeTranscriptNotAvailableLanguageError extends YoutubeTranscriptError { | ||
constructor(lang, availableLangs, videoId) { | ||
super(`No transcripts are available in ${lang} this video (${videoId}). Available languages: ${availableLangs.join(', ')}`); | ||
} | ||
} | ||
/** | ||
* Class to retrieve transcript if exist | ||
*/ | ||
var YoutubeTranscript = /** @class */ (function () { | ||
function YoutubeTranscript() { | ||
} | ||
class YoutubeTranscript { | ||
/** | ||
* Fetch transcript from YTB Video | ||
* @param videoId Video url or video identifier | ||
* @param config Get transcript in another country and language ISO | ||
* @param config Get transcript in a specific language ISO | ||
*/ | ||
YoutubeTranscript.fetchTranscript = function (videoId, config) { | ||
return __awaiter(this, void 0, void 0, function () { | ||
var identifier, videoPageResponse, videoPageBody, innerTubeApiKey, transcriptResponse, body, transcripts, e_1; | ||
return __generator(this, function (_a) { | ||
switch (_a.label) { | ||
case 0: | ||
identifier = this.retrieveVideoId(videoId); | ||
_a.label = 1; | ||
case 1: | ||
_a.trys.push([1, 7, , 8]); | ||
return [4 /*yield*/, fetch("https://www.youtube.com/watch?v=" + identifier, { | ||
headers: { | ||
'User-Agent': USER_AGENT, | ||
}, | ||
})]; | ||
case 2: | ||
videoPageResponse = _a.sent(); | ||
return [4 /*yield*/, videoPageResponse.text()]; | ||
case 3: | ||
videoPageBody = _a.sent(); | ||
innerTubeApiKey = videoPageBody | ||
.split('"INNERTUBE_API_KEY":"')[1] | ||
.split('"')[0]; | ||
if (!(innerTubeApiKey && innerTubeApiKey.length > 0)) return [3 /*break*/, 6]; | ||
return [4 /*yield*/, fetch("https://www.youtube.com/youtubei/v1/get_transcript?key=" + innerTubeApiKey, { | ||
method: 'POST', | ||
body: JSON.stringify(this.generateRequest(videoPageBody, config)), | ||
headers: { | ||
'Content-Type': 'application/json', | ||
'User-Agent': USER_AGENT, | ||
}, | ||
})]; | ||
case 4: | ||
transcriptResponse = _a.sent(); | ||
return [4 /*yield*/, transcriptResponse.json()]; | ||
case 5: | ||
body = _a.sent(); | ||
if (body.responseContext) { | ||
if (!body.actions) { | ||
throw new Error('Transcript is disabled on this video'); | ||
} | ||
transcripts = body.actions[0].updateEngagementPanelAction.content | ||
.transcriptRenderer.body.transcriptBodyRenderer.cueGroups; | ||
return [2 /*return*/, transcripts.map(function (cue) { return ({ | ||
text: cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer | ||
.cue.simpleText, | ||
duration: parseInt(cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer | ||
.durationMs), | ||
offset: parseInt(cue.transcriptCueGroupRenderer.cues[0].transcriptCueRenderer | ||
.startOffsetMs), | ||
}); })]; | ||
} | ||
_a.label = 6; | ||
case 6: return [3 /*break*/, 8]; | ||
case 7: | ||
e_1 = _a.sent(); | ||
throw new YoutubeTranscriptError(e_1); | ||
case 8: return [2 /*return*/]; | ||
static fetchTranscript(videoId, config) { | ||
var _a; | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const identifier = this.retrieveVideoId(videoId); | ||
const videoPageResponse = yield fetch(`https://www.youtube.com/watch?v=${identifier}`, { | ||
headers: Object.assign(Object.assign({}, ((config === null || config === void 0 ? void 0 : config.lang) && { 'Accept-Language': config.lang })), { 'User-Agent': USER_AGENT }), | ||
}); | ||
const videoPageBody = yield videoPageResponse.text(); | ||
const splittedHTML = videoPageBody.split('"captions":'); | ||
if (splittedHTML.length <= 1) { | ||
if (videoPageBody.includes('class="g-recaptcha"')) { | ||
throw new YoutubeTranscriptTooManyRequestError(); | ||
} | ||
}); | ||
}); | ||
}; | ||
/** | ||
* Generate tracking params for YTB API | ||
* @param page | ||
* @param config | ||
*/ | ||
YoutubeTranscript.generateRequest = function (page, config) { | ||
var _a, _b, _c, _d; | ||
var params = (_a = page.split('"serializedShareEntity":"')[1]) === null || _a === void 0 ? void 0 : _a.split('"')[0]; | ||
var visitorData = (_b = page.split('"VISITOR_DATA":"')[1]) === null || _b === void 0 ? void 0 : _b.split('"')[0]; | ||
var sessionId = (_c = page.split('"sessionId":"')[1]) === null || _c === void 0 ? void 0 : _c.split('"')[0]; | ||
var clickTrackingParams = (_d = page === null || page === void 0 ? void 0 : page.split('"clickTrackingParams":"')[1]) === null || _d === void 0 ? void 0 : _d.split('"')[0]; | ||
return { | ||
context: { | ||
client: { | ||
hl: (config === null || config === void 0 ? void 0 : config.lang) || 'en', | ||
gl: (config === null || config === void 0 ? void 0 : config.country) || 'US', | ||
visitorData: visitorData, | ||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)', | ||
clientName: 'WEB', | ||
clientVersion: '2.20200925.01.00', | ||
osName: 'Macintosh', | ||
osVersion: '10_15_4', | ||
browserName: 'Chrome', | ||
browserVersion: '85.0f.4183.83', | ||
screenWidthPoints: 1440, | ||
screenHeightPoints: 770, | ||
screenPixelDensity: 2, | ||
utcOffsetMinutes: 120, | ||
userInterfaceTheme: 'USER_INTERFACE_THEME_LIGHT', | ||
connectionType: 'CONN_CELLULAR_3G', | ||
}, | ||
request: { | ||
sessionId: sessionId, | ||
internalExperimentFlags: [], | ||
consistencyTokenJars: [], | ||
}, | ||
user: {}, | ||
clientScreenNonce: this.generateNonce(), | ||
clickTracking: { | ||
clickTrackingParams: clickTrackingParams, | ||
}, | ||
}, | ||
params: params, | ||
}; | ||
}; | ||
/** | ||
* 'base.js' function | ||
*/ | ||
YoutubeTranscript.generateNonce = function () { | ||
var rnd = Math.random().toString(); | ||
var alphabet = 'ABCDEFGHIJKLMOPQRSTUVWXYZabcdefghjijklmnopqrstuvwxyz0123456789'; | ||
var jda = [ | ||
alphabet + '+/=', | ||
alphabet + '+/', | ||
alphabet + '-_=', | ||
alphabet + '-_.', | ||
alphabet + '-_', | ||
]; | ||
var b = jda[3]; | ||
var a = []; | ||
for (var i = 0; i < rnd.length - 1; i++) { | ||
a.push(rnd[i].charCodeAt(i)); | ||
} | ||
var c = ''; | ||
var d = 0; | ||
var m, n, q, r, f, g; | ||
while (d < a.length) { | ||
f = a[d]; | ||
g = d + 1 < a.length; | ||
if (g) { | ||
m = a[d + 1]; | ||
if (!videoPageBody.includes('"playabilityStatus":')) { | ||
throw new YoutubeTranscriptVideoUnavailableError(videoId); | ||
} | ||
throw new YoutubeTranscriptDisabledError(videoId); | ||
} | ||
else { | ||
m = 0; | ||
const captions = (_a = (() => { | ||
try { | ||
return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', '')); | ||
} | ||
catch (e) { | ||
return undefined; | ||
} | ||
})()) === null || _a === void 0 ? void 0 : _a['playerCaptionsTracklistRenderer']; | ||
if (!captions) { | ||
throw new YoutubeTranscriptDisabledError(videoId); | ||
} | ||
n = d + 2 < a.length; | ||
if (n) { | ||
q = a[d + 2]; | ||
if (!('captionTracks' in captions)) { | ||
throw new YoutubeTranscriptNotAvailableError(videoId); | ||
} | ||
else { | ||
q = 0; | ||
if ((config === null || config === void 0 ? void 0 : config.lang) && | ||
!captions.captionTracks.some((track) => track.languageCode === (config === null || config === void 0 ? void 0 : config.lang))) { | ||
throw new YoutubeTranscriptNotAvailableLanguageError(config === null || config === void 0 ? void 0 : config.lang, captions.captionTracks.map((track) => track.languageCode), videoId); | ||
} | ||
r = f >> 2; | ||
f = ((f & 3) << 4) | (m >> 4); | ||
m = ((m & 15) << 2) | (q >> 6); | ||
q &= 63; | ||
if (!n) { | ||
q = 64; | ||
if (!q) { | ||
m = 64; | ||
} | ||
const transcriptURL = ((config === null || config === void 0 ? void 0 : config.lang) ? captions.captionTracks.find((track) => track.languageCode === config.lang) | ||
: captions.captionTracks[0]).baseUrl; | ||
const transcriptResponse = yield fetch(transcriptURL, { | ||
headers: Object.assign(Object.assign({}, ((config === null || config === void 0 ? void 0 : config.lang) && { 'Accept-Language': config.lang })), { 'User-Agent': USER_AGENT }), | ||
}); | ||
if (!transcriptResponse.ok) { | ||
throw new YoutubeTranscriptNotAvailableError(videoId); | ||
} | ||
c += b[r] + b[f] + b[m] + b[q]; | ||
d += 3; | ||
} | ||
return c; | ||
}; | ||
const transcriptBody = yield transcriptResponse.text(); | ||
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)]; | ||
return results.map((result) => { | ||
var _a; | ||
return ({ | ||
text: result[3], | ||
duration: parseFloat(result[2]), | ||
offset: parseFloat(result[1]), | ||
lang: (_a = config === null || config === void 0 ? void 0 : config.lang) !== null && _a !== void 0 ? _a : captions.captionTracks[0].languageCode, | ||
}); | ||
}); | ||
}); | ||
} | ||
/** | ||
@@ -253,7 +131,7 @@ * Retrieve video id from url or string | ||
*/ | ||
YoutubeTranscript.retrieveVideoId = function (videoId) { | ||
static retrieveVideoId(videoId) { | ||
if (videoId.length === 11) { | ||
return videoId; | ||
} | ||
var matchId = videoId.match(RE_YOUTUBE); | ||
const matchId = videoId.match(RE_YOUTUBE); | ||
if (matchId && matchId.length) { | ||
@@ -263,6 +141,5 @@ return matchId[1]; | ||
throw new YoutubeTranscriptError('Impossible to retrieve Youtube video ID.'); | ||
}; | ||
return YoutubeTranscript; | ||
}()); | ||
} | ||
} | ||
export { YoutubeTranscript, YoutubeTranscriptError }; | ||
export { YoutubeTranscript, YoutubeTranscriptDisabledError, YoutubeTranscriptError, YoutubeTranscriptNotAvailableError, YoutubeTranscriptNotAvailableLanguageError, YoutubeTranscriptTooManyRequestError, YoutubeTranscriptVideoUnavailableError }; |
{ | ||
"name": "youtube-transcript", | ||
"version": "1.1.0", | ||
"version": "1.2.0", | ||
"description": "Fetch transcript from a youtube video", | ||
@@ -5,0 +5,0 @@ "main": "dist/youtube-transcript.common.js", |
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
18229
312
1
5