{
		"name": "@freetube/yt-trending-scraper",
		"version": "2.0.1",
		"version": "3.1.0",
		"description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.",
		"main": "index.js",
		"files": [
		"index.js",
		"src/"
		],
		"scripts": {
		"test": "jest --watchAll --verbose"
		"test": "jest --watchAll --verbose --coverage",
		"test-ci": "jest --verbose --ci --coverage",
		"lint-fix": "eslint --fix --ext .js ./",
		"lint": "eslint --ext .js ./"
		},
		"repository": {
		"type": "git",
		"url": "https://github.com/GilgusMaximus/yt-trending-scraper.git"
		"url": "https://github.com/FreeTubeApp/yt-trending-scraper.git"
		},
		@@ -34,7 +41,15 @@ "publishConfig": {
		"dependencies": {
		"axios": "^0.21.1"
		"axios": "^0.27.2"
		},
		"devDependencies": {
		"jest": "^27.0.4"
		}
		"eslint": "^8.22.0",
		"eslint-config-prettier": "^8.5.0",
		"eslint-config-standard": "^17.0.0",
		"eslint-plugin-import": "^2.26.0",
		"eslint-plugin-n": "15.2.5",
		"eslint-plugin-node": "^11.1.0",
		"eslint-plugin-prettier": "^4.2.1",
		"eslint-plugin-promise": "^6.0.0",
		"jest": "^28.1.3",
		"prettier": "^2.7.1" }
		}

README.md

		# YouTube Trending Videos Scraper NodeJS Documentation
		This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well.
		This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube) rewrite but can be used with any other project as well.

		@@ -16,3 +16,3 @@ Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data.
		## API
		scrape_trending_page(_parameters_)
		scrapeTrendingPage(_parameters_)
		Returns a list of objects containing all the information of the trending videos.
		@@ -51,3 +51,3 @@

		ytrend.scrape_trending_page(parameters).then((data) =>{
		ytrend.scrapeTrendingPage(parameters).then((data) =>{
		console.log(data);
		@@ -79,2 +79,3 @@ }).catch((error)=>{
		isVerified: Boolean,
		isVerifiedArist: Boolean
		}
		@@ -81,0 +82,0 @@

src/TrendingRequester.js

		@@ -1,41 +0,33 @@
		const axios = require("axios")
		const trendingPageBase = "https://youtube.com/feed/trending"
		const axios = require('axios')
		const trendingPageBase = 'https://www.youtube.com/feed/trending'
		const pageAdditions = {
		'music': '4gINGgt5dG1hX2NoYXJ0cw%3D%3D',
		'gaming': '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
		'movies': '4gIKGgh0cmFpbGVycw%3D%3D'
		music: '4gINGgt5dG1hX2NoYXJ0cw%3D%3D',
		gaming: '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
		movies: '4gIKGgh0cmFpbGVycw%3D%3D'
		}

		class TrendingRequester {
		static async requestTrendingPage(geoLocation = null, page) {
		const config = {
		headers: {
		'x-youtube-client-name': '1',
		'x-youtube-client-version': '2.20180222',
		'accept-language': 'en-US,en;q=0.5'
		}
		}
		static async requestTrendingPage(geoLocation = null, page) {
		try {
		const params = {}
		if (geoLocation !== null) {
		params.persist_gl = 1
		params.gl = geoLocation
		}
		if (page !== 'default') {
		try {
		const params = {}
		if (geoLocation !== null) {
		params['persist_gl'] = 1
		params['gl'] = geoLocation
		}
		if (page !== 'default') {
		try {
		params['bp'] = pageAdditions[page]
		} catch (error) {
		console.error("Fallback to default trending page because no valid page name was provided:", page)
		}
		}
		return await axios.get(trendingPageBase, {params})

		} catch (e) {
		return {
		error: true,
		message: e
		}
		params.bp = pageAdditions[page]
		} catch (error) {
		console.error('Fallback to default trending page because no valid page name was provided:', page)
		}
		}
		return await axios.get(trendingPageBase, { params })
		} catch (e) {
		return {
		error: true,
		message: e
		}
		}
		}
		}
		module.exports = TrendingRequester

213

src/Youtube-Scraper.js

		@@ -1,209 +0,10 @@
		const requester = require("./TrendingRequester")

		const requester = require('./TrendingRequester')
		const htmlParser = require('./HtmlParser')
		class YoutubeScraper {

		//starting point
		static async scrape_trending_page(parameters) {
		let geoLocation = null
		let page = 'default'
		let parseCreatorOnRise = false
		if (parameters) {
		if ('geoLocation' in parameters) {
		geoLocation = parameters.geoLocation
		}
		if ('page' in parameters) {
		page = parameters.page
		}
		if ('parseCreatorOnRise' in parameters) {
		parseCreatorOnRise = parameters.parseCreatorOnRise
		}
		}
		const request_data = await requester.requestTrendingPage(geoLocation, page);
		return this.parse_new_html(request_data.data, parseCreatorOnRise);
		}

		static parse_new_html(html_data, parseCreatorOnRise) {
		// matches the special setup of the video elements
		let jsonContent = '{' + html_data.match(/"sectionListRenderer".+?(},"tab)/)[0]
		// remove the last chars in order to make it valid JSON
		jsonContent = jsonContent.substr(0, jsonContent.length-5)
		const contentArrayJSON = JSON.parse(jsonContent).sectionListRenderer.contents
		let videos = []
		const current_time = Date.now();
		contentArrayJSON.forEach((data) => {
		const videoList = this.build_api_output(data.itemSectionRenderer.contents[0].shelfRenderer.content, current_time, parseCreatorOnRise)
		videos = [...videos, ...videoList]
		})
		return videos
		}
		//access the one video container and build and object with all the data required
		static build_api_output(videoList, currentTime, parseCreatorOnRise){
		if ('horizontalListRenderer' in videoList && parseCreatorOnRise) {
		// we have a creator on the rise element with other structure
		return this.parse_horizontal_video_section(videoList.horizontalListRenderer.items, currentTime)
		} else if('expandedShelfContentsRenderer' in videoList) {
		// normal video section
		return this.parse_normal_video_section(videoList.expandedShelfContentsRenderer.items, currentTime)
		}
		return []
		}

		static parse_horizontal_video_section(videoList, currentTime) {
		const videoEntryList = []
		videoList.forEach((videoRenderer) => {
		videoRenderer = videoRenderer.gridVideoRenderer
		let video_entry = {
		videoId: -1,
		title: "",
		type: "video",
		author: "",
		authorId: "",
		authorUrl: "",
		videoThumbnails: [],
		description: "",
		viewCount: -1,
		published: -1,
		publishedText: "",
		lengthSeconds: -1,
		liveNow: false,
		paid: false,
		premium: false,
		isUpcoming: false,
		timeText: "",
		isCreatorOnRise: true,
		};

		video_entry.videoId = videoRenderer.videoId;
		video_entry.title = videoRenderer.title.runs[0].text;
		video_entry.author = videoRenderer.shortBylineText.runs[0].text;
		video_entry.authorId = videoRenderer.shortBylineText.runs[0].navigationEndpoint.browseEndpoint.browseId;
		video_entry.authorUrl = videoRenderer.shortBylineText.runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url;
		video_entry.viewCount = this.calculate_view_count(videoRenderer.viewCountText.simpleText);
		video_entry.publishedText = videoRenderer.publishedTimeText.simpleText;
		video_entry.published = this.calculate_published(video_entry.publishedText, currentTime);
		video_entry.timeText = videoRenderer.thumbnailOverlays[0].thumbnailOverlayTimeStatusRenderer.text.simpleText;
		video_entry.lengthSeconds = this.calculate_length_in_seconds(video_entry.timeText);
		video_entry.videoThumbnails = this.extract_thumbnail_data(video_entry.videoId);
		//check whether the property is available, because there can be videos without description which won't have an empty property
		if(videoRenderer.hasOwnProperty("descriptionSnippet")){
		video_entry.description = videoRenderer.descriptionSnippet.runs[0].text;
		}
		videoEntryList.push(video_entry);
		})
		return videoEntryList
		}

		static parse_normal_video_section(videoList, currentTime) {
		const videoEntryList = []
		videoList.forEach((videoRenderer) => {
		videoRenderer = videoRenderer.videoRenderer
		let video_entry = {
		videoId: -1,
		title: "",
		type: "video",
		author: "",
		authorId: "",
		authorUrl: "",
		videoThumbnails: [],
		description: "",
		viewCount: -1,
		published: -1,
		publishedText: "",
		lengthSeconds: -1,
		liveNow: false,
		paid: false,
		premium: false,
		isUpcoming: false,
		timeText: "",
		isCreatorOnRise: false,
		isVerified: false
		};
		video_entry.videoId = videoRenderer.videoId;
		video_entry.title = videoRenderer.title.runs[0].text;
		video_entry.author = videoRenderer.longBylineText.runs[0].text;
		video_entry.authorId = videoRenderer.ownerText.runs[0].navigationEndpoint.browseEndpoint.browseId;
		video_entry.authorUrl = videoRenderer.longBylineText.runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url;
		video_entry.viewCount = ('viewCountText' in videoRenderer) ? this.calculate_view_count(videoRenderer.viewCountText.simpleText) : 0;
		video_entry.publishedText = videoRenderer.publishedTimeText.simpleText;
		video_entry.published = this.calculate_published(video_entry.publishedText, currentTime);
		video_entry.timeText = videoRenderer.lengthText.simpleText;
		video_entry.lengthSeconds = this.calculate_length_in_seconds(video_entry.timeText);
		video_entry.videoThumbnails = this.extract_thumbnail_data(video_entry.videoId);
		if ('ownerBadges' in videoRenderer) {
		video_entry.isVerified = (videoRenderer.ownerBadges[0].metadataBadgeRenderer.tooltip == 'Verified')
		}

		//check whether the property is available, because there can be videos without description which won't have an empty property
		if(videoRenderer.hasOwnProperty("descriptionSnippet")){
		video_entry.description = videoRenderer.descriptionSnippet.runs[0].text;
		}
		videoEntryList.push(video_entry);
		})
		return videoEntryList
		}

		//calculates the length of the video in seconds as a number from the string "hh:mm:ss"
		static calculate_length_in_seconds(lengthText){
		let length_seconds = 0;
		const hours_minutes_seconds = lengthText.match(/(\d(\d)*)/g);
		// calculate the time in seconds for every entry
		for(let i = hours_minutes_seconds.length-1; i >= 0; i--){
		length_seconds += Math.pow(60, (hours_minutes_seconds.length - i - 1)) * hours_minutes_seconds[i];
		}
		return length_seconds;
		}

		//calculates the number of views from the corresponding string "xxx,xxx,xxx,xxx"
		static calculate_view_count(viewText){
		let view_count = 0;
		const viewers_three_split = viewText.match(/(\d(\d)*)/g);
		for(let i = 0; i < viewers_three_split.length; i++){
		view_count = view_count * 1000 + Number(viewers_three_split[i]);
		}
		return view_count;
		}

		//calculates the rough timestamp of the release - very exact for minutes, medium exact for hours and loosy exact for days
		static calculate_published(publishText, currentTime){
		const time_published_ago = publishText.match(/(\d(\d)*)/g);
		let time_span;
		if(publishText.indexOf("day") > -1){
		// posted x days ago
		time_span = Number(time_published_ago[0]) * 24 * 360 * 1000;
		}else if(publishText.indexOf("hours") > -1){
		// posted x hours ago
		time_span = Number(time_published_ago[0]) * 360 * 1000;
		}else{
		// posted x minutes ago, just in case
		time_span = Number(time_published_ago[0]) * 60 * 1000;
		}
		return currentTime - time_span;
		}

		//creates a list of dictionaries with the relevant data for the different thumbnails
		//TODO maxres.jpg does not load even tho it loads in invidious
		static extract_thumbnail_data(videoId){
		//TODO: make customizable
		return [
		this.create_thumbnail_dictionary("maxres", `https://i.ytimg.com/vi/${videoId}/maxres.jpg`, 1280, 720),
		this.create_thumbnail_dictionary("maxresdefault", `https://i.ytimg.com/vi/${videoId}/maxresdefault.jpg`, 1280, 720),
		this.create_thumbnail_dictionary("sddefault", `https://i.ytimg.com/vi/${videoId}/sddefault.jpg`, 640, 480),
		this.create_thumbnail_dictionary("high", `https://i.ytimg.com/vi/${videoId}/hqdefault.jpg`, 480, 360),
		this.create_thumbnail_dictionary("medium", `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`, 320, 180),
		this.create_thumbnail_dictionary("default", `https://i.ytimg.com/vi/${videoId}/default.jpg`, 120, 90),
		this.create_thumbnail_dictionary("start", `https://i.ytimg.com/vi/${videoId}/1.jpg`, 120, 90),
		this.create_thumbnail_dictionary("middle", `https://i.ytimg.com/vi/${videoId}/2.jpg`, 120, 90),
		this.create_thumbnail_dictionary("end", `https://i.ytimg.com/vi/${videoId}/3.jpg`, 120, 90),
		];
		}

		static create_thumbnail_dictionary(Quality, Url, Width, Height){
		return{
		quality: Quality,
		url: Url,
		width: Width,
		height: Height
		};
		}
		// starting point
		static async scrapeTrendingPage({ page = 'default', geoLocation = null, parseCreatorOnRise = false } = {}) {
		const requestData = await requester.requestTrendingPage(geoLocation, page)
		return htmlParser.parseNewHtml(requestData.data, parseCreatorOnRise)
		}
		}
		module.exports = YoutubeScraper

test/scraper.test.js

LICENSE

Sorry, the diff of this file is not supported yet

@freetube/yt-trending-scraper - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics

Dependency changes