yt-trending-scraper
Advanced tools
Comparing version 1.0.0 to 1.0.1
module.exports = require("./src/Youtube-Scraper") | ||
// let a = require("./src/Youtube-Scraper") | ||
// async function b() { | ||
// let data = await a.scrape_trending_page() | ||
// | ||
// } | ||
// | ||
// b() |
{ | ||
"name": "yt-trending-scraper", | ||
"version": "1.0.0", | ||
"version": "1.0.1", | ||
"description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
# YouTube Trending Videos NodeJS Documentation | ||
This NodeJS library scrapes the trending page of YouTube without any API usaged. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well. | ||
This NodeJS library scrapes the trending page of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well. | ||
Therefore this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data. | ||
Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data. | ||
The library works as long as YouTube keeps it's Webpage layout the same. Therefore there is **no guarantee** that this library will work at all times. | ||
The library works as long as YouTube keeps its web page layout the same. Therefore, there is **no guarantee** that this library will work at all times. | ||
If this library should not work at some point, please create an issue and let me know so that I can take a look into it. Pull requests are also welcomed in this case. | ||
## Installation | ||
Will follow soon | ||
`npm install yt-trending-scraper` | ||
##Usage | ||
`const ytrend = require("yt-trending-scraper")` | ||
## API | ||
Will follow soon | ||
**scrape_trending_page()** | ||
Returns a list of objects containing all the information of the trending videos. | ||
```javascript | ||
ytrend.scrape_trending_page().then((data) =>{ | ||
console.log(data); | ||
}).catch((error)=>{ | ||
console.log(error); | ||
}); | ||
// The data is a list of objects containing the following attributes: | ||
{ | ||
videoId: String, | ||
title: String, | ||
type: "video", | ||
author: String, | ||
authorId: String, | ||
authorUrl: String, | ||
videoThumbnails: Array[Objects], | ||
description: String, | ||
viewCount: Number, | ||
published: Number as timestamp, | ||
publishedText: String, | ||
lengthSeconds: Number, | ||
timeText: String, | ||
liveNow: false, | ||
paid: false, | ||
premium: false, | ||
isUpcoming: false | ||
} | ||
// The thumbnail objects: | ||
{ | ||
quality: "String", | ||
url: "String", | ||
width: Number, | ||
height: Number | ||
} | ||
``` | ||
## Credits | ||
Thanks to PrestoN for the basic instructions and underlying request code and thanks to ~cadence for the HTML extractor RegEx. |
@@ -6,2 +6,3 @@ const requester = require("./TrendingRequester") | ||
//starting point | ||
static async scrape_trending_page() { | ||
@@ -12,7 +13,11 @@ const request_data = await requester.requestTrendingPage(); | ||
//extract the required JSON object from the HTML data | ||
static parse_html(html_data){ | ||
//TODO Take a look whether a regex that directly filters out the videoRenderers is possible | ||
//Thanks to cadence for the Regex expression | ||
const ytInitialData = (html_data.match(/^\s*window\["ytInitialData"\] = (\{.*\});$/m) || [])[1]; | ||
//TODO Take a look whether a regex that directly filters out the videoRenderers is possible | ||
//create a JSON object from the JSON string | ||
const yt_data_json = JSON.parse(ytInitialData); | ||
//extract the video containers | ||
const video_section_renderers = yt_data_json.contents. | ||
@@ -24,2 +29,3 @@ twoColumnBrowseResultsRenderer.tabs[0]. | ||
for(let i = 0; i < video_section_renderers.length; i++){ | ||
//check if the creator of the day is available and skip it - at the moment always section 2 and has 4 videos | ||
if(video_section_renderers.length === 4 && i === 1){ | ||
@@ -31,2 +37,3 @@ continue; | ||
} | ||
// get the current timestamp for calculating the published variable (is in milliseconds) | ||
@@ -42,2 +49,3 @@ const current_time = Date.now(); | ||
//access the one video container and build and object with all the data required | ||
static build_api_output(videoRenderer, currentTime){ | ||
@@ -63,2 +71,3 @@ let video_entry = { | ||
}; | ||
//access the relevant field of data an calculate missing values | ||
video_entry.videoId = videoRenderer.videoId; | ||
@@ -79,2 +88,3 @@ video_entry.title = videoRenderer.title.runs[0]; | ||
//calculates the length of the video in seconds as a number from the string "hh:mm:ss" | ||
static calculate_length_in_seconds(lengthText){ | ||
@@ -90,2 +100,3 @@ let length_seconds = 0; | ||
//calculates the number of views from the corresponding string "xxx,xxx,xxx,xxx" | ||
static calculate_view_count(viewText){ | ||
@@ -100,2 +111,3 @@ let view_count = 0; | ||
//calculates the rough timestamp of the release - very exact for minutes, medium exact for hours and loosy exact for days | ||
static calculate_published(publishText, currentTime){ | ||
@@ -117,2 +129,4 @@ const time_published_ago = publishText.match(/(\d(\d)*)/g); | ||
//creates a list of dictionaries with the relevant data for the different thumbnails | ||
//TODO maxres.jpg does not load even tho it loads in invidious | ||
static extract_thumbnail_data(videoId){ | ||
@@ -119,0 +133,0 @@ //TODO: make customizable |
46502
155
56