youtube-transcript-js-api
Advanced tools
Comparing version
@@ -136,3 +136,49 @@ #!/usr/bin/env node | ||
}); | ||
commander_1.program | ||
.command('debug <video>') | ||
.description('Debug transcript extraction for a YouTube video (helps diagnose why extraction might fail)') | ||
.action(async (video) => { | ||
try { | ||
console.error('Running debug analysis...'); | ||
const api = new index_1.YouTubeTranscriptApi(); | ||
const debugInfo = await api.debugTranscriptExtraction(video); | ||
console.log('\n=== Debug Analysis Results ==='); | ||
console.log(`Video ID: ${debugInfo.videoId}`); | ||
console.log('\n--- Page Information ---'); | ||
console.log(`HTML Length: ${debugInfo.pageInfo.htmlLength.toLocaleString()} characters`); | ||
console.log(`Has Player Response: ${debugInfo.pageInfo.hasPlayerResponse ? '✓' : '✗'}`); | ||
console.log(`Has Caption Tracks: ${debugInfo.pageInfo.hasCaptionTracks ? '✓' : '✗'}`); | ||
console.log(`Caption Track Mentions: ${debugInfo.pageInfo.captionTrackMatches}`); | ||
console.log('\n--- Extraction Attempts ---'); | ||
debugInfo.extractionAttempts.forEach((attempt, index) => { | ||
const status = attempt.success ? '✓' : '✗'; | ||
console.log(`${index + 1}. ${attempt.method}: ${status}`); | ||
if (attempt.success && attempt.dataFound !== undefined) { | ||
console.log(` → Found ${attempt.dataFound} transcript(s)`); | ||
} | ||
if (attempt.error) { | ||
console.log(` → Error: ${attempt.error}`); | ||
} | ||
}); | ||
if (debugInfo.recommendations.length > 0) { | ||
console.log('\n--- Recommendations ---'); | ||
debugInfo.recommendations.forEach((rec, index) => { | ||
console.log(`${index + 1}. ${rec}`); | ||
}); | ||
} | ||
const successfulAttempts = debugInfo.extractionAttempts.filter(a => a.success); | ||
if (successfulAttempts.length === 0) { | ||
console.log('\n❌ No extraction methods succeeded'); | ||
console.log('This video may not have transcripts available, or YouTube has changed their page structure.'); | ||
} | ||
else { | ||
console.log(`\n✅ ${successfulAttempts.length} extraction method(s) succeeded`); | ||
} | ||
} | ||
catch (error) { | ||
console.error('Debug Error:', error instanceof Error ? error.message : String(error)); | ||
process.exit(1); | ||
} | ||
}); | ||
commander_1.program.parse(); | ||
//# sourceMappingURL=cli.js.map |
@@ -48,4 +48,6 @@ import { TranscriptEntry, TranscriptList, TranscriptConfig, LanguageCode } from './types'; | ||
private parseCaptionsJson; | ||
private tryAlternativeJsonParsing; | ||
private extractTranscriptData; | ||
private extractTranscriptDataFallback; | ||
private extractTranscriptDataLastResort; | ||
private findTranscript; | ||
@@ -56,3 +58,23 @@ private findBestTranscriptForTranslation; | ||
private parseTranscriptXml; | ||
/** | ||
* Debug method to analyze why transcript extraction might be failing | ||
* This method provides detailed information about the page content and extraction attempts | ||
*/ | ||
debugTranscriptExtraction(videoUrl: string): Promise<{ | ||
videoId: string; | ||
pageInfo: { | ||
htmlLength: number; | ||
hasPlayerResponse: boolean; | ||
hasCaptionTracks: boolean; | ||
captionTrackMatches: number; | ||
}; | ||
extractionAttempts: { | ||
method: string; | ||
success: boolean; | ||
error?: string; | ||
dataFound?: number; | ||
}[]; | ||
recommendations: string[]; | ||
}>; | ||
} | ||
//# sourceMappingURL=transcript.d.ts.map |
@@ -166,45 +166,75 @@ "use strict"; | ||
async getTranscriptList(videoId) { | ||
try { | ||
const response = await this.httpClient.get(`https://www.youtube.com/watch?v=${videoId}`); | ||
// Enhanced rate limit detection | ||
if (response.status === 429 || (0, utils_1.detectRateLimitingFromResponse)(response.data, response.status)) { | ||
throw new errors_1.TooManyRequestsError(); | ||
} | ||
const html = response.data; | ||
// Check if video is available | ||
if (this.isVideoUnavailable(html)) { | ||
throw new errors_1.VideoUnavailableError(videoId); | ||
} | ||
// Extract transcript data from page | ||
const transcripts = this.extractTranscriptData(html, videoId); | ||
if (transcripts.length === 0) { | ||
throw new errors_1.TranscriptsDisabledError(videoId); | ||
} | ||
return { | ||
videoId, | ||
transcripts | ||
}; | ||
} | ||
catch (error) { | ||
if (error instanceof errors_1.YouTubeTranscriptError) { | ||
throw error; | ||
} | ||
if (axios_1.default.isAxiosError(error)) { | ||
const maxRetries = 3; | ||
let lastError = null; | ||
for (let attempt = 1; attempt <= maxRetries; attempt++) { | ||
try { | ||
console.log(`[YouTubeTranscriptApi] Attempt ${attempt}/${maxRetries} to fetch transcript list for ${videoId}`); | ||
const response = await this.httpClient.get(`https://www.youtube.com/watch?v=${videoId}`); | ||
// Enhanced rate limit detection | ||
if (error.response?.status === 429 || (0, utils_1.isRateLimitError)(error)) { | ||
if (response.status === 429 || (0, utils_1.detectRateLimitingFromResponse)(response.data, response.status)) { | ||
throw new errors_1.TooManyRequestsError(); | ||
} | ||
// Check response content for rate limiting indicators | ||
if (error.response?.data && | ||
(0, utils_1.detectRateLimitingFromResponse)(error.response.data, error.response.status)) { | ||
const html = response.data; | ||
// Check if video is available | ||
if (this.isVideoUnavailable(html)) { | ||
throw new errors_1.VideoUnavailableError(videoId); | ||
} | ||
// Extract transcript data from page with enhanced debugging | ||
console.log(`[YouTubeTranscriptApi] HTML content length: ${html.length} for ${videoId}`); | ||
const transcripts = this.extractTranscriptData(html, videoId); | ||
if (transcripts.length === 0) { | ||
if (attempt < maxRetries) { | ||
console.warn(`[YouTubeTranscriptApi] No transcripts found on attempt ${attempt}, retrying...`); | ||
// Add a small delay before retrying | ||
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); | ||
continue; | ||
} | ||
throw new errors_1.TranscriptsDisabledError(videoId); | ||
} | ||
console.log(`[YouTubeTranscriptApi] Successfully extracted ${transcripts.length} transcripts for ${videoId} on attempt ${attempt}`); | ||
return { | ||
videoId, | ||
transcripts | ||
}; | ||
} | ||
catch (error) { | ||
lastError = error; | ||
if (error instanceof errors_1.YouTubeTranscriptError) { | ||
// Don't retry for specific YouTube errors (except TranscriptsDisabledError on early attempts) | ||
if (!(error instanceof errors_1.TranscriptsDisabledError) || attempt === maxRetries) { | ||
throw error; | ||
} | ||
} | ||
if (axios_1.default.isAxiosError(error)) { | ||
// Enhanced rate limit detection | ||
if (error.response?.status === 429 || (0, utils_1.isRateLimitError)(error)) { | ||
throw new errors_1.TooManyRequestsError(); | ||
} | ||
// Check response content for rate limiting indicators | ||
if (error.response?.data && | ||
(0, utils_1.detectRateLimitingFromResponse)(error.response.data, error.response.status)) { | ||
throw new errors_1.TooManyRequestsError(); | ||
} | ||
// For network errors, retry | ||
if (attempt < maxRetries) { | ||
console.warn(`[YouTubeTranscriptApi] Network error on attempt ${attempt}, retrying: ${error.message}`); | ||
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); | ||
continue; | ||
} | ||
throw new errors_1.TranscriptRetrievalError(videoId, `HTTP ${error.response?.status}: ${error.message}`); | ||
} | ||
// Check if it's a rate limiting error by message | ||
if ((0, utils_1.isRateLimitError)(error)) { | ||
throw new errors_1.TooManyRequestsError(); | ||
} | ||
throw new errors_1.TranscriptRetrievalError(videoId, `HTTP ${error.response?.status}: ${error.message}`); | ||
// For other errors, retry if we have attempts left | ||
if (attempt < maxRetries) { | ||
console.warn(`[YouTubeTranscriptApi] Error on attempt ${attempt}, retrying: ${error.message}`); | ||
await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); | ||
continue; | ||
} | ||
} | ||
// Check if it's a rate limiting error by message | ||
if ((0, utils_1.isRateLimitError)(error)) { | ||
throw new errors_1.TooManyRequestsError(); | ||
} | ||
throw new errors_1.TranscriptRetrievalError(videoId, error.message); | ||
} | ||
// If we've exhausted all retries | ||
throw new errors_1.TranscriptRetrievalError(videoId, lastError?.message || 'Unknown error after all retry attempts'); | ||
} | ||
@@ -223,10 +253,131 @@ isVideoUnavailable(html) { | ||
try { | ||
const captionsData = JSON.parse(jsonString); | ||
if (Array.isArray(captionsData)) { | ||
// Ensure all essential fields are present, especially baseUrl | ||
const validCaptions = captionsData.filter(caption => caption.baseUrl); | ||
if (validCaptions.length !== captionsData.length) { | ||
console.warn(`[YouTubeTranscriptApi] Some caption entries were missing baseUrl for ${videoId}`); | ||
// Clean up the JSON string - remove potential trailing commas and fix common issues | ||
let cleanedJson = jsonString.trim(); | ||
// Fix trailing commas before closing brackets/braces | ||
cleanedJson = cleanedJson.replace(/,(\s*[\]}])/g, '$1'); | ||
// Try to extract just the array part if it's embedded in larger structure | ||
const arrayMatch = cleanedJson.match(/\[[\s\S]*\]/); | ||
if (arrayMatch) { | ||
cleanedJson = arrayMatch[0]; | ||
} | ||
console.log(`[YouTubeTranscriptApi] Attempting to parse captions JSON for ${videoId}. Length: ${cleanedJson.length}`); | ||
const captionsData = JSON.parse(cleanedJson); | ||
if (!Array.isArray(captionsData)) { | ||
console.warn(`[YouTubeTranscriptApi] Parsed caption data is not an array for ${videoId}. Type: ${typeof captionsData}`); | ||
return null; | ||
} | ||
if (captionsData.length === 0) { | ||
console.warn(`[YouTubeTranscriptApi] Parsed caption data is empty array for ${videoId}`); | ||
return null; | ||
} | ||
// Enhanced validation and processing | ||
const validCaptions = captionsData | ||
.filter((caption) => { | ||
// More flexible validation - check for baseUrl or any URL-like field | ||
const hasUrl = caption.baseUrl || caption.url || caption.captionUrl; | ||
if (!hasUrl) { | ||
console.warn(`[YouTubeTranscriptApi] Caption entry missing URL field for ${videoId}:`, caption); | ||
return false; | ||
} | ||
return validCaptions.map((caption) => ({ | ||
return true; | ||
}) | ||
.map((caption) => { | ||
// Extract URL from various possible fields | ||
const url = caption.baseUrl || caption.url || caption.captionUrl; | ||
// Extract language information with fallbacks | ||
const language = caption.languageCode || caption.lang || caption.language || 'unknown'; | ||
// Extract language name with multiple fallback strategies | ||
let languageName = 'Unknown'; | ||
if (caption.name?.simpleText) { | ||
languageName = caption.name.simpleText; | ||
} | ||
else if (caption.name?.runs?.[0]?.text) { | ||
languageName = caption.name.runs[0].text; | ||
} | ||
else if (caption.displayName) { | ||
languageName = caption.displayName; | ||
} | ||
else if (caption.languageName) { | ||
languageName = caption.languageName; | ||
} | ||
else if (language !== 'unknown') { | ||
languageName = language; // Use language code as-is instead of capitalizing | ||
} | ||
// Determine if it's auto-generated with multiple field checks | ||
const isGenerated = caption.kind === 'asr' || | ||
caption.isAutoGenerated === true || | ||
caption.autoGenerated === true || | ||
languageName.toLowerCase().includes('auto'); | ||
// Determine translatability | ||
const isTranslatable = caption.isTranslatable !== false && | ||
caption.translatable !== false; | ||
return { | ||
language, | ||
languageName, | ||
isGenerated, | ||
isTranslatable, | ||
url | ||
}; | ||
}); | ||
if (validCaptions.length !== captionsData.length) { | ||
console.warn(`[YouTubeTranscriptApi] Filtered ${captionsData.length - validCaptions.length} invalid caption entries for ${videoId}`); | ||
} | ||
if (validCaptions.length === 0) { | ||
console.warn(`[YouTubeTranscriptApi] No valid caption entries found after filtering for ${videoId}`); | ||
return null; | ||
} | ||
console.log(`[YouTubeTranscriptApi] Successfully parsed ${validCaptions.length} caption entries for ${videoId}`); | ||
return validCaptions; | ||
} | ||
catch (error) { | ||
console.warn(`[YouTubeTranscriptApi] Failed to parse captions JSON for ${videoId}: ${error.message}`); | ||
console.warn(`[YouTubeTranscriptApi] JSON snippet: ${jsonString.substring(0, 200)}...`); | ||
// Try alternative parsing approaches | ||
return this.tryAlternativeJsonParsing(jsonString, videoId); | ||
} | ||
} | ||
tryAlternativeJsonParsing(jsonString, videoId) { | ||
console.log(`[YouTubeTranscriptApi] Trying alternative JSON parsing approaches for ${videoId}`); | ||
try { | ||
// Approach 1: Try to manually extract individual caption objects | ||
const captionPattern = /\{[^{}]*"baseUrl"[^{}]*\}/g; | ||
const captionMatches = jsonString.match(captionPattern); | ||
if (captionMatches && captionMatches.length > 0) { | ||
const captions = []; | ||
for (const captionMatch of captionMatches) { | ||
try { | ||
const caption = JSON.parse(captionMatch); | ||
if (caption.baseUrl) { | ||
captions.push({ | ||
language: caption.languageCode || 'unknown', | ||
languageName: caption.name?.simpleText || caption.languageCode || 'Unknown', | ||
isGenerated: caption.kind === 'asr', | ||
isTranslatable: !!caption.isTranslatable, | ||
url: caption.baseUrl | ||
}); | ||
} | ||
} | ||
catch (e) { | ||
// Skip invalid individual captions | ||
continue; | ||
} | ||
} | ||
if (captions.length > 0) { | ||
console.log(`[YouTubeTranscriptApi] Alternative parsing extracted ${captions.length} captions for ${videoId}`); | ||
return captions; | ||
} | ||
} | ||
// Approach 2: Try to fix common JSON issues and re-parse | ||
let fixedJson = jsonString; | ||
// Fix unescaped quotes in strings | ||
fixedJson = fixedJson.replace(/"([^"]*)"([^",:}\]]*)"([^"]*)":/g, '"$1\\"$2\\"$3":'); | ||
// Fix missing quotes around property names | ||
fixedJson = fixedJson.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":'); | ||
// Try parsing the fixed JSON | ||
const fixedData = JSON.parse(fixedJson); | ||
if (Array.isArray(fixedData) && fixedData.length > 0) { | ||
console.log(`[YouTubeTranscriptApi] Fixed JSON parsing succeeded for ${videoId}`); | ||
return fixedData | ||
.filter(caption => caption.baseUrl) | ||
.map(caption => ({ | ||
language: caption.languageCode || 'unknown', | ||
@@ -236,41 +387,116 @@ languageName: caption.name?.simpleText || caption.languageCode || 'Unknown', | ||
isTranslatable: !!caption.isTranslatable, | ||
url: caption.baseUrl, | ||
url: caption.baseUrl | ||
})); | ||
} | ||
console.warn(`[YouTubeTranscriptApi] Parsed caption data is not an array for ${videoId}. Data: ${jsonString.substring(0, 100)}`); | ||
return null; | ||
} | ||
catch (error) { | ||
console.warn(`[YouTubeTranscriptApi] Failed to parse captions JSON for ${videoId}: ${error.message}. Data: ${jsonString.substring(0, 100)}`); | ||
return null; | ||
console.warn(`[YouTubeTranscriptApi] Alternative JSON parsing also failed for ${videoId}: ${error.message}`); | ||
} | ||
return null; | ||
} | ||
extractTranscriptData(html, videoId) { | ||
const primaryPattern = /"captionTracks":\s*(\[.*?\])/; | ||
const match = html.match(primaryPattern); | ||
if (match && match[1]) { | ||
const parsed = this.parseCaptionsJson(match[1], videoId); | ||
if (parsed) { | ||
return parsed; | ||
// Enhanced primary pattern with better regex | ||
const patterns = [ | ||
// Primary pattern with more flexible whitespace handling | ||
/"captionTracks":\s*(\[(?:[^\[\]]*(?:\[[^\]]*\])*)*\])/, | ||
// Original simple pattern as fallback | ||
/"captionTracks":\s*(\[.*?\])/, | ||
// Pattern for when captionTracks is nested deeper | ||
/"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/ | ||
]; | ||
for (let i = 0; i < patterns.length; i++) { | ||
const match = html.match(patterns[i]); | ||
if (match && match[1]) { | ||
console.log(`[YouTubeTranscriptApi] Pattern ${i + 1} matched for ${videoId}`); | ||
const parsed = this.parseCaptionsJson(match[1], videoId); | ||
if (parsed && parsed.length > 0) { | ||
return parsed; | ||
} | ||
} | ||
} | ||
// If primary pattern didn't match, or parsing failed/returned null, try fallbacks | ||
// If primary patterns didn't work, try comprehensive fallbacks | ||
return this.extractTranscriptDataFallback(html, videoId); | ||
} | ||
extractTranscriptDataFallback(html, videoId) { | ||
// Alternative extraction methods | ||
console.log(`[YouTubeTranscriptApi] Trying fallback extraction methods for ${videoId}`); | ||
// Comprehensive fallback patterns for different YouTube page structures | ||
const patterns = [ | ||
/"captions"[\s\S]*?"playerCaptionsTracklistRenderer"[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\])/, | ||
/"captionsInitialState"[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\])/ | ||
// Pattern 1: Standard captions structure | ||
/"captions"[\s\S]*?"playerCaptionsTracklistRenderer"[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/, | ||
// Pattern 2: Initial state structure | ||
/"captionsInitialState"[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/, | ||
// Pattern 3: ytInitialPlayerResponse structure | ||
/"ytInitialPlayerResponse"[\s\S]*?"captions"[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/, | ||
// Pattern 4: Direct search in window object | ||
/window\["ytInitialPlayerResponse"\][\s\S]*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/, | ||
// Pattern 5: Alternative window structure | ||
/var\s+ytInitialPlayerResponse\s*=[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/, | ||
// Pattern 6: Embedded in script tag | ||
/<script[^>]*>[\s\S]*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))[\s\S]*?<\/script>/, | ||
// Pattern 7: More relaxed pattern for edge cases | ||
/"captionTracks":\s*(\[[^\]]*(?:\{[^}]*\}[^\]]*)*\])/, | ||
// Pattern 8: Search in any JSON-like structure | ||
/(?:caption|track|subtitle).*?"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/i | ||
]; | ||
for (const pattern of patterns) { | ||
const match = html.match(pattern); | ||
if (match && match[1]) { | ||
const parsed = this.parseCaptionsJson(match[1], videoId); | ||
if (parsed) { | ||
return parsed; // Return as soon as a fallback pattern works | ||
for (let i = 0; i < patterns.length; i++) { | ||
try { | ||
const match = html.match(patterns[i]); | ||
if (match && match[1]) { | ||
console.log(`[YouTubeTranscriptApi] Fallback pattern ${i + 1} matched for ${videoId}`); | ||
const parsed = this.parseCaptionsJson(match[1], videoId); | ||
if (parsed && parsed.length > 0) { | ||
console.log(`[YouTubeTranscriptApi] Successfully extracted ${parsed.length} transcripts using fallback pattern ${i + 1}`); | ||
return parsed; | ||
} | ||
} | ||
// If parsing this fallback failed, continue to the next pattern | ||
} | ||
catch (error) { | ||
console.warn(`[YouTubeTranscriptApi] Error in fallback pattern ${i + 1} for ${videoId}: ${error.message}`); | ||
continue; | ||
} | ||
} | ||
// Last resort: try to find any caption-related data and extract manually | ||
return this.extractTranscriptDataLastResort(html, videoId); | ||
} | ||
extractTranscriptDataLastResort(html, videoId) { | ||
console.log(`[YouTubeTranscriptApi] Attempting last resort extraction for ${videoId}`); | ||
try { | ||
// Try to find any baseUrl patterns that might indicate captions | ||
const baseUrlPattern = /"baseUrl":\s*"([^"]*timedtext[^"]*)"/g; | ||
const urls = []; | ||
let match; | ||
while ((match = baseUrlPattern.exec(html)) !== null) { | ||
urls.push(match[1]); | ||
} | ||
if (urls.length > 0) { | ||
console.log(`[YouTubeTranscriptApi] Found ${urls.length} potential transcript URLs via last resort method`); | ||
// Try to extract language information from URLs or nearby context | ||
const transcripts = []; | ||
for (const url of urls) { | ||
// Extract language from URL parameters | ||
const langMatch = url.match(/[&?]lang=([^&]*)/); | ||
const tlangMatch = url.match(/[&?]tlang=([^&]*)/); | ||
const language = langMatch ? langMatch[1] : (tlangMatch ? tlangMatch[1] : 'unknown'); | ||
transcripts.push({ | ||
language, | ||
languageName: language.charAt(0).toUpperCase() + language.slice(1), | ||
isGenerated: url.includes('kind=asr') || url.includes('&kind=asr'), | ||
isTranslatable: true, // Assume translatable for last resort | ||
url: url.startsWith('http') ? url : `https://www.youtube.com${url}` | ||
}); | ||
} | ||
return transcripts; | ||
} | ||
// Final attempt: look for any mention of transcript-related data | ||
const fallbackPattern = /(?:transcript|caption|subtitle).*?lang.*?(?:en|es|fr|de|it|pt|ru|ja|ko|zh|ar|hi)/gi; | ||
const fallbackMatches = html.match(fallbackPattern); | ||
if (fallbackMatches && fallbackMatches.length > 0) { | ||
console.log(`[YouTubeTranscriptApi] Found ${fallbackMatches.length} potential transcript indicators via text search`); | ||
// This is a very basic fallback - in practice, you might want to implement more sophisticated parsing | ||
} | ||
} | ||
catch (error) { | ||
console.warn(`[YouTubeTranscriptApi] Error in last resort extraction for ${videoId}: ${error.message}`); | ||
} | ||
console.log(`[YouTubeTranscriptApi] All extraction methods failed for ${videoId}`); | ||
return []; | ||
@@ -360,4 +586,77 @@ } | ||
} | ||
/** | ||
* Debug method to analyze why transcript extraction might be failing | ||
* This method provides detailed information about the page content and extraction attempts | ||
*/ | ||
async debugTranscriptExtraction(videoUrl) { | ||
const videoId = this.extractAndValidateVideoId(videoUrl); | ||
try { | ||
const response = await this.httpClient.get(`https://www.youtube.com/watch?v=${videoId}`); | ||
const html = response.data; | ||
const debug = { | ||
videoId, | ||
pageInfo: { | ||
htmlLength: html.length, | ||
hasPlayerResponse: html.includes('ytInitialPlayerResponse'), | ||
hasCaptionTracks: html.includes('captionTracks'), | ||
captionTrackMatches: (html.match(/captionTracks/g) || []).length | ||
}, | ||
extractionAttempts: [], | ||
recommendations: [] | ||
}; | ||
// Test primary extraction patterns | ||
const primaryPatterns = [ | ||
{ name: 'Standard pattern', pattern: /"captionTracks":\s*(\[(?:[^\[\]]*(?:\[[^\]]*\])*)*\])/ }, | ||
{ name: 'Simple pattern', pattern: /"captionTracks":\s*(\[.*?\])/ }, | ||
{ name: 'Nested pattern', pattern: /"captionTracks":\s*(\[[\s\S]*?\](?=\s*[,}]))/ } | ||
]; | ||
for (const { name, pattern } of primaryPatterns) { | ||
const match = html.match(pattern); | ||
if (match) { | ||
try { | ||
const parsed = this.parseCaptionsJson(match[1], videoId); | ||
debug.extractionAttempts.push({ | ||
method: name, | ||
success: parsed !== null && parsed.length > 0, | ||
dataFound: parsed?.length || 0 | ||
}); | ||
} | ||
catch (error) { | ||
debug.extractionAttempts.push({ | ||
method: name, | ||
success: false, | ||
error: error.message | ||
}); | ||
} | ||
} | ||
else { | ||
debug.extractionAttempts.push({ | ||
method: name, | ||
success: false, | ||
error: 'Pattern did not match' | ||
}); | ||
} | ||
} | ||
// Analyze and provide recommendations | ||
if (!debug.pageInfo.hasPlayerResponse) { | ||
debug.recommendations.push('Page does not contain ytInitialPlayerResponse - video may be private or unavailable'); | ||
} | ||
if (!debug.pageInfo.hasCaptionTracks) { | ||
debug.recommendations.push('No captionTracks found in page - transcripts may be disabled for this video'); | ||
} | ||
if (debug.pageInfo.captionTrackMatches === 0) { | ||
debug.recommendations.push('Try checking if the video has manually uploaded captions or auto-generated captions enabled'); | ||
} | ||
if (debug.extractionAttempts.every(attempt => !attempt.success)) { | ||
debug.recommendations.push('All extraction patterns failed - YouTube may have changed their page structure'); | ||
debug.recommendations.push('Consider reporting this as a bug with the video ID for investigation'); | ||
} | ||
return debug; | ||
} | ||
catch (error) { | ||
throw new errors_1.TranscriptRetrievalError(videoId, `Debug extraction failed: ${error.message}`); | ||
} | ||
} | ||
} | ||
exports.YouTubeTranscriptApi = YouTubeTranscriptApi; | ||
//# sourceMappingURL=transcript.js.map |
{ | ||
"name": "youtube-transcript-js-api", | ||
"version": "1.1.1", | ||
"version": "1.2.0", | ||
"description": "A JavaScript/TypeScript library to fetch YouTube video transcripts", | ||
@@ -40,3 +40,3 @@ "main": "dist/src/index.js", | ||
"engines": { | ||
"node": ">=14.0.0" | ||
"node": ">=18.0.0" | ||
}, | ||
@@ -43,0 +43,0 @@ "devDependencies": { |
@@ -8,3 +8,3 @@ # YouTube Transcript JS API | ||
[](https://opensource.org/licenses/MIT) | ||
[](https://nodejs.org/) | ||
[](https://nodejs.org/) | ||
@@ -61,3 +61,3 @@ A comprehensive TypeScript/JavaScript library to fetch YouTube video transcripts with advanced rate limiting and multiple output formats. This is a complete port of the popular Python library [`youtube-transcript-api`](https://github.com/jdepoix/youtube-transcript-api) with significant enhancements including intelligent rate limiting, user agent rotation, and extensive TypeScript support. | ||
**Requirements**: Node.js 14.0.0 or higher | ||
**Requirements**: Node.js 18.0.0 or higher | ||
@@ -64,0 +64,0 @@ ## Quick Start |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
166165
24.35%2079
21.44%