@freetube/yt-trending-scraper
Advanced tools
Comparing version 2.0.1 to 3.1.0
@@ -1,1 +0,1 @@ | ||
module.exports = require("./src/Youtube-Scraper") | ||
module.exports = require('./src/Youtube-Scraper') |
{ | ||
"name": "@freetube/yt-trending-scraper", | ||
"version": "2.0.1", | ||
"version": "3.1.0", | ||
"description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.", | ||
"main": "index.js", | ||
"files": [ | ||
"index.js", | ||
"src/" | ||
], | ||
"scripts": { | ||
"test": "jest --watchAll --verbose" | ||
"test": "jest --watchAll --verbose --coverage", | ||
"test-ci": "jest --verbose --ci --coverage", | ||
"lint-fix": "eslint --fix --ext .js ./", | ||
"lint": "eslint --ext .js ./" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/GilgusMaximus/yt-trending-scraper.git" | ||
"url": "https://github.com/FreeTubeApp/yt-trending-scraper.git" | ||
}, | ||
@@ -34,7 +41,15 @@ "publishConfig": { | ||
"dependencies": { | ||
"axios": "^0.21.1" | ||
"axios": "^0.27.2" | ||
}, | ||
"devDependencies": { | ||
"jest": "^27.0.4" | ||
} | ||
"eslint": "^8.22.0", | ||
"eslint-config-prettier": "^8.5.0", | ||
"eslint-config-standard": "^17.0.0", | ||
"eslint-plugin-import": "^2.26.0", | ||
"eslint-plugin-n": "15.2.5", | ||
"eslint-plugin-node": "^11.1.0", | ||
"eslint-plugin-prettier": "^4.2.1", | ||
"eslint-plugin-promise": "^6.0.0", | ||
"jest": "^28.1.3", | ||
"prettier": "^2.7.1" } | ||
} |
# YouTube Trending Videos Scraper NodeJS Documentation | ||
This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well. | ||
This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube) rewrite but can be used with any other project as well. | ||
@@ -16,3 +16,3 @@ Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data. | ||
## API | ||
**scrape_trending_page(_parameters_)** | ||
**scrapeTrendingPage(_parameters_)** | ||
Returns a list of objects containing all the information of the trending videos. | ||
@@ -51,3 +51,3 @@ | ||
ytrend.scrape_trending_page(parameters).then((data) =>{ | ||
ytrend.scrapeTrendingPage(parameters).then((data) =>{ | ||
console.log(data); | ||
@@ -79,2 +79,3 @@ }).catch((error)=>{ | ||
isVerified: Boolean, | ||
isVerifiedArist: Boolean | ||
} | ||
@@ -81,0 +82,0 @@ |
@@ -1,41 +0,33 @@ | ||
const axios = require("axios") | ||
const trendingPageBase = "https://youtube.com/feed/trending" | ||
const axios = require('axios') | ||
const trendingPageBase = 'https://www.youtube.com/feed/trending' | ||
const pageAdditions = { | ||
'music': '4gINGgt5dG1hX2NoYXJ0cw%3D%3D', | ||
'gaming': '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D', | ||
'movies': '4gIKGgh0cmFpbGVycw%3D%3D' | ||
music: '4gINGgt5dG1hX2NoYXJ0cw%3D%3D', | ||
gaming: '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D', | ||
movies: '4gIKGgh0cmFpbGVycw%3D%3D' | ||
} | ||
class TrendingRequester { | ||
static async requestTrendingPage(geoLocation = null, page) { | ||
const config = { | ||
headers: { | ||
'x-youtube-client-name': '1', | ||
'x-youtube-client-version': '2.20180222', | ||
'accept-language': 'en-US,en;q=0.5' | ||
} | ||
} | ||
static async requestTrendingPage(geoLocation = null, page) { | ||
try { | ||
const params = {} | ||
if (geoLocation !== null) { | ||
params.persist_gl = 1 | ||
params.gl = geoLocation | ||
} | ||
if (page !== 'default') { | ||
try { | ||
const params = {} | ||
if (geoLocation !== null) { | ||
params['persist_gl'] = 1 | ||
params['gl'] = geoLocation | ||
} | ||
if (page !== 'default') { | ||
try { | ||
params['bp'] = pageAdditions[page] | ||
} catch (error) { | ||
console.error("Fallback to default trending page because no valid page name was provided:", page) | ||
} | ||
} | ||
return await axios.get(trendingPageBase, {params}) | ||
} catch (e) { | ||
return { | ||
error: true, | ||
message: e | ||
} | ||
params.bp = pageAdditions[page] | ||
} catch (error) { | ||
console.error('Fallback to default trending page because no valid page name was provided:', page) | ||
} | ||
} | ||
return await axios.get(trendingPageBase, { params }) | ||
} catch (e) { | ||
return { | ||
error: true, | ||
message: e | ||
} | ||
} | ||
} | ||
} | ||
module.exports = TrendingRequester |
@@ -1,209 +0,10 @@ | ||
const requester = require("./TrendingRequester") | ||
const requester = require('./TrendingRequester') | ||
const htmlParser = require('./HtmlParser') | ||
class YoutubeScraper { | ||
//starting point | ||
static async scrape_trending_page(parameters) { | ||
let geoLocation = null | ||
let page = 'default' | ||
let parseCreatorOnRise = false | ||
if (parameters) { | ||
if ('geoLocation' in parameters) { | ||
geoLocation = parameters.geoLocation | ||
} | ||
if ('page' in parameters) { | ||
page = parameters.page | ||
} | ||
if ('parseCreatorOnRise' in parameters) { | ||
parseCreatorOnRise = parameters.parseCreatorOnRise | ||
} | ||
} | ||
const request_data = await requester.requestTrendingPage(geoLocation, page); | ||
return this.parse_new_html(request_data.data, parseCreatorOnRise); | ||
} | ||
static parse_new_html(html_data, parseCreatorOnRise) { | ||
// matches the special setup of the video elements | ||
let jsonContent = '{' + html_data.match(/"sectionListRenderer".+?(},"tab)/)[0] | ||
// remove the last chars in order to make it valid JSON | ||
jsonContent = jsonContent.substr(0, jsonContent.length-5) | ||
const contentArrayJSON = JSON.parse(jsonContent).sectionListRenderer.contents | ||
let videos = [] | ||
const current_time = Date.now(); | ||
contentArrayJSON.forEach((data) => { | ||
const videoList = this.build_api_output(data.itemSectionRenderer.contents[0].shelfRenderer.content, current_time, parseCreatorOnRise) | ||
videos = [...videos, ...videoList] | ||
}) | ||
return videos | ||
} | ||
//access the one video container and build and object with all the data required | ||
static build_api_output(videoList, currentTime, parseCreatorOnRise){ | ||
if ('horizontalListRenderer' in videoList && parseCreatorOnRise) { | ||
// we have a creator on the rise element with other structure | ||
return this.parse_horizontal_video_section(videoList.horizontalListRenderer.items, currentTime) | ||
} else if('expandedShelfContentsRenderer' in videoList) { | ||
// normal video section | ||
return this.parse_normal_video_section(videoList.expandedShelfContentsRenderer.items, currentTime) | ||
} | ||
return [] | ||
} | ||
static parse_horizontal_video_section(videoList, currentTime) { | ||
const videoEntryList = [] | ||
videoList.forEach((videoRenderer) => { | ||
videoRenderer = videoRenderer.gridVideoRenderer | ||
let video_entry = { | ||
videoId: -1, | ||
title: "", | ||
type: "video", | ||
author: "", | ||
authorId: "", | ||
authorUrl: "", | ||
videoThumbnails: [], | ||
description: "", | ||
viewCount: -1, | ||
published: -1, | ||
publishedText: "", | ||
lengthSeconds: -1, | ||
liveNow: false, | ||
paid: false, | ||
premium: false, | ||
isUpcoming: false, | ||
timeText: "", | ||
isCreatorOnRise: true, | ||
}; | ||
video_entry.videoId = videoRenderer.videoId; | ||
video_entry.title = videoRenderer.title.runs[0].text; | ||
video_entry.author = videoRenderer.shortBylineText.runs[0].text; | ||
video_entry.authorId = videoRenderer.shortBylineText.runs[0].navigationEndpoint.browseEndpoint.browseId; | ||
video_entry.authorUrl = videoRenderer.shortBylineText.runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url; | ||
video_entry.viewCount = this.calculate_view_count(videoRenderer.viewCountText.simpleText); | ||
video_entry.publishedText = videoRenderer.publishedTimeText.simpleText; | ||
video_entry.published = this.calculate_published(video_entry.publishedText, currentTime); | ||
video_entry.timeText = videoRenderer.thumbnailOverlays[0].thumbnailOverlayTimeStatusRenderer.text.simpleText; | ||
video_entry.lengthSeconds = this.calculate_length_in_seconds(video_entry.timeText); | ||
video_entry.videoThumbnails = this.extract_thumbnail_data(video_entry.videoId); | ||
//check whether the property is available, because there can be videos without description which won't have an empty property | ||
if(videoRenderer.hasOwnProperty("descriptionSnippet")){ | ||
video_entry.description = videoRenderer.descriptionSnippet.runs[0].text; | ||
} | ||
videoEntryList.push(video_entry); | ||
}) | ||
return videoEntryList | ||
} | ||
static parse_normal_video_section(videoList, currentTime) { | ||
const videoEntryList = [] | ||
videoList.forEach((videoRenderer) => { | ||
videoRenderer = videoRenderer.videoRenderer | ||
let video_entry = { | ||
videoId: -1, | ||
title: "", | ||
type: "video", | ||
author: "", | ||
authorId: "", | ||
authorUrl: "", | ||
videoThumbnails: [], | ||
description: "", | ||
viewCount: -1, | ||
published: -1, | ||
publishedText: "", | ||
lengthSeconds: -1, | ||
liveNow: false, | ||
paid: false, | ||
premium: false, | ||
isUpcoming: false, | ||
timeText: "", | ||
isCreatorOnRise: false, | ||
isVerified: false | ||
}; | ||
video_entry.videoId = videoRenderer.videoId; | ||
video_entry.title = videoRenderer.title.runs[0].text; | ||
video_entry.author = videoRenderer.longBylineText.runs[0].text; | ||
video_entry.authorId = videoRenderer.ownerText.runs[0].navigationEndpoint.browseEndpoint.browseId; | ||
video_entry.authorUrl = videoRenderer.longBylineText.runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url; | ||
video_entry.viewCount = ('viewCountText' in videoRenderer) ? this.calculate_view_count(videoRenderer.viewCountText.simpleText) : 0; | ||
video_entry.publishedText = videoRenderer.publishedTimeText.simpleText; | ||
video_entry.published = this.calculate_published(video_entry.publishedText, currentTime); | ||
video_entry.timeText = videoRenderer.lengthText.simpleText; | ||
video_entry.lengthSeconds = this.calculate_length_in_seconds(video_entry.timeText); | ||
video_entry.videoThumbnails = this.extract_thumbnail_data(video_entry.videoId); | ||
if ('ownerBadges' in videoRenderer) { | ||
video_entry.isVerified = (videoRenderer.ownerBadges[0].metadataBadgeRenderer.tooltip == 'Verified') | ||
} | ||
//check whether the property is available, because there can be videos without description which won't have an empty property | ||
if(videoRenderer.hasOwnProperty("descriptionSnippet")){ | ||
video_entry.description = videoRenderer.descriptionSnippet.runs[0].text; | ||
} | ||
videoEntryList.push(video_entry); | ||
}) | ||
return videoEntryList | ||
} | ||
//calculates the length of the video in seconds as a number from the string "hh:mm:ss" | ||
static calculate_length_in_seconds(lengthText){ | ||
let length_seconds = 0; | ||
const hours_minutes_seconds = lengthText.match(/(\d(\d)*)/g); | ||
// calculate the time in seconds for every entry | ||
for(let i = hours_minutes_seconds.length-1; i >= 0; i--){ | ||
length_seconds += Math.pow(60, (hours_minutes_seconds.length - i - 1)) * hours_minutes_seconds[i]; | ||
} | ||
return length_seconds; | ||
} | ||
//calculates the number of views from the corresponding string "xxx,xxx,xxx,xxx" | ||
static calculate_view_count(viewText){ | ||
let view_count = 0; | ||
const viewers_three_split = viewText.match(/(\d(\d)*)/g); | ||
for(let i = 0; i < viewers_three_split.length; i++){ | ||
view_count = view_count * 1000 + Number(viewers_three_split[i]); | ||
} | ||
return view_count; | ||
} | ||
//calculates the rough timestamp of the release - very exact for minutes, medium exact for hours and loosy exact for days | ||
static calculate_published(publishText, currentTime){ | ||
const time_published_ago = publishText.match(/(\d(\d)*)/g); | ||
let time_span; | ||
if(publishText.indexOf("day") > -1){ | ||
// posted x days ago | ||
time_span = Number(time_published_ago[0]) * 24 * 360 * 1000; | ||
}else if(publishText.indexOf("hours") > -1){ | ||
// posted x hours ago | ||
time_span = Number(time_published_ago[0]) * 360 * 1000; | ||
}else{ | ||
// posted x minutes ago, just in case | ||
time_span = Number(time_published_ago[0]) * 60 * 1000; | ||
} | ||
return currentTime - time_span; | ||
} | ||
//creates a list of dictionaries with the relevant data for the different thumbnails | ||
//TODO maxres.jpg does not load even tho it loads in invidious | ||
static extract_thumbnail_data(videoId){ | ||
//TODO: make customizable | ||
return [ | ||
this.create_thumbnail_dictionary("maxres", `https://i.ytimg.com/vi/${videoId}/maxres.jpg`, 1280, 720), | ||
this.create_thumbnail_dictionary("maxresdefault", `https://i.ytimg.com/vi/${videoId}/maxresdefault.jpg`, 1280, 720), | ||
this.create_thumbnail_dictionary("sddefault", `https://i.ytimg.com/vi/${videoId}/sddefault.jpg`, 640, 480), | ||
this.create_thumbnail_dictionary("high", `https://i.ytimg.com/vi/${videoId}/hqdefault.jpg`, 480, 360), | ||
this.create_thumbnail_dictionary("medium", `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`, 320, 180), | ||
this.create_thumbnail_dictionary("default", `https://i.ytimg.com/vi/${videoId}/default.jpg`, 120, 90), | ||
this.create_thumbnail_dictionary("start", `https://i.ytimg.com/vi/${videoId}/1.jpg`, 120, 90), | ||
this.create_thumbnail_dictionary("middle", `https://i.ytimg.com/vi/${videoId}/2.jpg`, 120, 90), | ||
this.create_thumbnail_dictionary("end", `https://i.ytimg.com/vi/${videoId}/3.jpg`, 120, 90), | ||
]; | ||
} | ||
static create_thumbnail_dictionary(Quality, Url, Width, Height){ | ||
return{ | ||
quality: Quality, | ||
url: Url, | ||
width: Width, | ||
height: Height | ||
}; | ||
} | ||
// starting point | ||
static async scrapeTrendingPage({ page = 'default', geoLocation = null, parseCreatorOnRise = false } = {}) { | ||
const requestData = await requester.requestTrendingPage(geoLocation, page) | ||
return htmlParser.parseNewHtml(requestData.data, parseCreatorOnRise) | ||
} | ||
} | ||
module.exports = YoutubeScraper |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
90
49595
10
223
2
+ Addedasynckit@0.4.0(transitive)
+ Addedaxios@0.27.2(transitive)
+ Addedcombined-stream@1.0.8(transitive)
+ Addeddelayed-stream@1.0.0(transitive)
+ Addedform-data@4.0.1(transitive)
+ Addedmime-db@1.52.0(transitive)
+ Addedmime-types@2.1.35(transitive)
- Removedaxios@0.21.4(transitive)
Updatedaxios@^0.27.2