yt-trending-scraper - npm Package Compare versions

yt-trending-scraper

Package Overview

Dependencies

Maintainers

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.0.0 to 1.0.1

index.js

		module.exports = require("./src/Youtube-Scraper")
		// let a = require("./src/Youtube-Scraper")
		// async function b() {
		// let data = await a.scrape_trending_page()
		//
		// }
		//
		// b()

package.json

		{
		"name": "yt-trending-scraper",
		"version": "1.0.0",
		"version": "1.0.1",
		"description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.",
		@@ -5,0 +5,0 @@ "main": "index.js",

README.md

		# YouTube Trending Videos NodeJS Documentation
		This NodeJS library scrapes the trending page of YouTube without any API usaged. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well.
		This NodeJS library scrapes the trending page of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well.

		Therefore this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data.
		Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data.

		The library works as long as YouTube keeps it's Webpage layout the same. Therefore there is no guarantee that this library will work at all times.
		The library works as long as YouTube keeps its web page layout the same. Therefore, there is no guarantee that this library will work at all times.
		If this library should not work at some point, please create an issue and let me know so that I can take a look into it. Pull requests are also welcomed in this case.

		## Installation
		Will follow soon
		`npm install yt-trending-scraper`

		##Usage
		`const ytrend = require("yt-trending-scraper")`

		## API
		Will follow soon
		scrape_trending_page()
		Returns a list of objects containing all the information of the trending videos.
		```javascript
		ytrend.scrape_trending_page().then((data) =>{
		console.log(data);
		}).catch((error)=>{
		console.log(error);
		});

		// The data is a list of objects containing the following attributes:
		{
		videoId: String,
		title: String,
		type: "video",
		author: String,
		authorId: String,
		authorUrl: String,
		videoThumbnails: Array[Objects],
		description: String,
		viewCount: Number,
		published: Number as timestamp,
		publishedText: String,
		lengthSeconds: Number,
		timeText: String,
		liveNow: false,
		paid: false,
		premium: false,
		isUpcoming: false
		}

		// The thumbnail objects:
		{
		quality: "String",
		url: "String",
		width: Number,
		height: Number
		}
		```
		## Credits
		Thanks to PrestoN for the basic instructions and underlying request code and thanks to ~cadence for the HTML extractor RegEx.

src/Youtube-Scraper.js

		@@ -6,2 +6,3 @@ const requester = require("./TrendingRequester")

		//starting point
		static async scrape_trending_page() {
		@@ -12,7 +13,11 @@ const request_data = await requester.requestTrendingPage();

		//extract the required JSON object from the HTML data
		static parse_html(html_data){
		//TODO Take a look whether a regex that directly filters out the videoRenderers is possible
		//Thanks to cadence for the Regex expression
		const ytInitialData = (html_data.match(/^\swindow\["ytInitialData"\] = (\{.\});$/m) \|\| [])[1];
		//TODO Take a look whether a regex that directly filters out the videoRenderers is possible

		//create a JSON object from the JSON string
		const yt_data_json = JSON.parse(ytInitialData);
		//extract the video containers
		const video_section_renderers = yt_data_json.contents.
		@@ -24,2 +29,3 @@ twoColumnBrowseResultsRenderer.tabs[0].
		for(let i = 0; i < video_section_renderers.length; i++){
		//check if the creator of the day is available and skip it - at the moment always section 2 and has 4 videos
		if(video_section_renderers.length === 4 && i === 1){
		@@ -31,2 +37,3 @@ continue;
		}

		// get the current timestamp for calculating the published variable (is in milliseconds)
		@@ -42,2 +49,3 @@ const current_time = Date.now();

		//access the one video container and build and object with all the data required
		static build_api_output(videoRenderer, currentTime){
		@@ -63,2 +71,3 @@ let video_entry = {
		};
		//access the relevant field of data an calculate missing values
		video_entry.videoId = videoRenderer.videoId;
		@@ -79,2 +88,3 @@ video_entry.title = videoRenderer.title.runs[0];

		//calculates the length of the video in seconds as a number from the string "hh:mm:ss"
		static calculate_length_in_seconds(lengthText){
		@@ -90,2 +100,3 @@ let length_seconds = 0;

		//calculates the number of views from the corresponding string "xxx,xxx,xxx,xxx"
		static calculate_view_count(viewText){
		@@ -100,2 +111,3 @@ let view_count = 0;

		//calculates the rough timestamp of the release - very exact for minutes, medium exact for hours and loosy exact for days
		static calculate_published(publishText, currentTime){
		@@ -117,2 +129,4 @@ const time_published_ago = publishText.match(/(\d(\d)*)/g);

		//creates a list of dictionaries with the relevant data for the different thumbnails
		//TODO maxres.jpg does not load even tho it loads in invidious
		static extract_thumbnail_data(videoId){
		@@ -119,0 +133,0 @@ //TODO: make customizable

Improved metrics