New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@freetube/yt-trending-scraper

Package Overview
Dependencies
Maintainers
3
Versions
3
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@freetube/yt-trending-scraper - npm Package Compare versions

Comparing version 2.0.1 to 3.1.0

src/HtmlParser.js

2

index.js

@@ -1,1 +0,1 @@

module.exports = require("./src/Youtube-Scraper")
module.exports = require('./src/Youtube-Scraper')
{
"name": "@freetube/yt-trending-scraper",
"version": "2.0.1",
"version": "3.1.0",
"description": "Identifies the currently trending videos on YouTube and returns all trending site information about every video without accessing the YouTube API.",
"main": "index.js",
"files": [
"index.js",
"src/"
],
"scripts": {
"test": "jest --watchAll --verbose"
"test": "jest --watchAll --verbose --coverage",
"test-ci": "jest --verbose --ci --coverage",
"lint-fix": "eslint --fix --ext .js ./",
"lint": "eslint --ext .js ./"
},
"repository": {
"type": "git",
"url": "https://github.com/GilgusMaximus/yt-trending-scraper.git"
"url": "https://github.com/FreeTubeApp/yt-trending-scraper.git"
},

@@ -34,7 +41,15 @@ "publishConfig": {

"dependencies": {
"axios": "^0.21.1"
"axios": "^0.27.2"
},
"devDependencies": {
"jest": "^27.0.4"
}
"eslint": "^8.22.0",
"eslint-config-prettier": "^8.5.0",
"eslint-config-standard": "^17.0.0",
"eslint-plugin-import": "^2.26.0",
"eslint-plugin-n": "15.2.5",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-promise": "^6.0.0",
"jest": "^28.1.3",
"prettier": "^2.7.1" }
}
# YouTube Trending Videos Scraper NodeJS Documentation
This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube-Vue) rewrite but can be used with any other project as well.
This NodeJS library can scrape all available trending pages of YouTube without any API usage. It is developed for and tailored towards easy usage in the [FreeTube](https://github.com/FreeTubeApp/FreeTube) rewrite but can be used with any other project as well.

@@ -16,3 +16,3 @@ Therefore, this library does not require any API keys, with the attached maximum quotas, but instead might take longer to receive the required data.

## API
**scrape_trending_page(_parameters_)**
**scrapeTrendingPage(_parameters_)**
Returns a list of objects containing all the information of the trending videos.

@@ -51,3 +51,3 @@

ytrend.scrape_trending_page(parameters).then((data) =>{
ytrend.scrapeTrendingPage(parameters).then((data) =>{
console.log(data);

@@ -79,2 +79,3 @@ }).catch((error)=>{

isVerified: Boolean,
isVerifiedArist: Boolean
}

@@ -81,0 +82,0 @@

@@ -1,41 +0,33 @@

const axios = require("axios")
const trendingPageBase = "https://youtube.com/feed/trending"
const axios = require('axios')
const trendingPageBase = 'https://www.youtube.com/feed/trending'
const pageAdditions = {
'music': '4gINGgt5dG1hX2NoYXJ0cw%3D%3D',
'gaming': '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
'movies': '4gIKGgh0cmFpbGVycw%3D%3D'
music: '4gINGgt5dG1hX2NoYXJ0cw%3D%3D',
gaming: '4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
movies: '4gIKGgh0cmFpbGVycw%3D%3D'
}
class TrendingRequester {
static async requestTrendingPage(geoLocation = null, page) {
const config = {
headers: {
'x-youtube-client-name': '1',
'x-youtube-client-version': '2.20180222',
'accept-language': 'en-US,en;q=0.5'
}
}
static async requestTrendingPage(geoLocation = null, page) {
try {
const params = {}
if (geoLocation !== null) {
params.persist_gl = 1
params.gl = geoLocation
}
if (page !== 'default') {
try {
const params = {}
if (geoLocation !== null) {
params['persist_gl'] = 1
params['gl'] = geoLocation
}
if (page !== 'default') {
try {
params['bp'] = pageAdditions[page]
} catch (error) {
console.error("Fallback to default trending page because no valid page name was provided:", page)
}
}
return await axios.get(trendingPageBase, {params})
} catch (e) {
return {
error: true,
message: e
}
params.bp = pageAdditions[page]
} catch (error) {
console.error('Fallback to default trending page because no valid page name was provided:', page)
}
}
return await axios.get(trendingPageBase, { params })
} catch (e) {
return {
error: true,
message: e
}
}
}
}
module.exports = TrendingRequester

@@ -1,209 +0,10 @@

const requester = require("./TrendingRequester")
const requester = require('./TrendingRequester')
const htmlParser = require('./HtmlParser')
class YoutubeScraper {
//starting point
static async scrape_trending_page(parameters) {
let geoLocation = null
let page = 'default'
let parseCreatorOnRise = false
if (parameters) {
if ('geoLocation' in parameters) {
geoLocation = parameters.geoLocation
}
if ('page' in parameters) {
page = parameters.page
}
if ('parseCreatorOnRise' in parameters) {
parseCreatorOnRise = parameters.parseCreatorOnRise
}
}
const request_data = await requester.requestTrendingPage(geoLocation, page);
return this.parse_new_html(request_data.data, parseCreatorOnRise);
}
static parse_new_html(html_data, parseCreatorOnRise) {
// matches the special setup of the video elements
let jsonContent = '{' + html_data.match(/"sectionListRenderer".+?(},"tab)/)[0]
// remove the last chars in order to make it valid JSON
jsonContent = jsonContent.substr(0, jsonContent.length-5)
const contentArrayJSON = JSON.parse(jsonContent).sectionListRenderer.contents
let videos = []
const current_time = Date.now();
contentArrayJSON.forEach((data) => {
const videoList = this.build_api_output(data.itemSectionRenderer.contents[0].shelfRenderer.content, current_time, parseCreatorOnRise)
videos = [...videos, ...videoList]
})
return videos
}
//access the one video container and build and object with all the data required
static build_api_output(videoList, currentTime, parseCreatorOnRise){
if ('horizontalListRenderer' in videoList && parseCreatorOnRise) {
// we have a creator on the rise element with other structure
return this.parse_horizontal_video_section(videoList.horizontalListRenderer.items, currentTime)
} else if('expandedShelfContentsRenderer' in videoList) {
// normal video section
return this.parse_normal_video_section(videoList.expandedShelfContentsRenderer.items, currentTime)
}
return []
}
static parse_horizontal_video_section(videoList, currentTime) {
const videoEntryList = []
videoList.forEach((videoRenderer) => {
videoRenderer = videoRenderer.gridVideoRenderer
let video_entry = {
videoId: -1,
title: "",
type: "video",
author: "",
authorId: "",
authorUrl: "",
videoThumbnails: [],
description: "",
viewCount: -1,
published: -1,
publishedText: "",
lengthSeconds: -1,
liveNow: false,
paid: false,
premium: false,
isUpcoming: false,
timeText: "",
isCreatorOnRise: true,
};
video_entry.videoId = videoRenderer.videoId;
video_entry.title = videoRenderer.title.runs[0].text;
video_entry.author = videoRenderer.shortBylineText.runs[0].text;
video_entry.authorId = videoRenderer.shortBylineText.runs[0].navigationEndpoint.browseEndpoint.browseId;
video_entry.authorUrl = videoRenderer.shortBylineText.runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url;
video_entry.viewCount = this.calculate_view_count(videoRenderer.viewCountText.simpleText);
video_entry.publishedText = videoRenderer.publishedTimeText.simpleText;
video_entry.published = this.calculate_published(video_entry.publishedText, currentTime);
video_entry.timeText = videoRenderer.thumbnailOverlays[0].thumbnailOverlayTimeStatusRenderer.text.simpleText;
video_entry.lengthSeconds = this.calculate_length_in_seconds(video_entry.timeText);
video_entry.videoThumbnails = this.extract_thumbnail_data(video_entry.videoId);
//check whether the property is available, because there can be videos without description which won't have an empty property
if(videoRenderer.hasOwnProperty("descriptionSnippet")){
video_entry.description = videoRenderer.descriptionSnippet.runs[0].text;
}
videoEntryList.push(video_entry);
})
return videoEntryList
}
static parse_normal_video_section(videoList, currentTime) {
const videoEntryList = []
videoList.forEach((videoRenderer) => {
videoRenderer = videoRenderer.videoRenderer
let video_entry = {
videoId: -1,
title: "",
type: "video",
author: "",
authorId: "",
authorUrl: "",
videoThumbnails: [],
description: "",
viewCount: -1,
published: -1,
publishedText: "",
lengthSeconds: -1,
liveNow: false,
paid: false,
premium: false,
isUpcoming: false,
timeText: "",
isCreatorOnRise: false,
isVerified: false
};
video_entry.videoId = videoRenderer.videoId;
video_entry.title = videoRenderer.title.runs[0].text;
video_entry.author = videoRenderer.longBylineText.runs[0].text;
video_entry.authorId = videoRenderer.ownerText.runs[0].navigationEndpoint.browseEndpoint.browseId;
video_entry.authorUrl = videoRenderer.longBylineText.runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url;
video_entry.viewCount = ('viewCountText' in videoRenderer) ? this.calculate_view_count(videoRenderer.viewCountText.simpleText) : 0;
video_entry.publishedText = videoRenderer.publishedTimeText.simpleText;
video_entry.published = this.calculate_published(video_entry.publishedText, currentTime);
video_entry.timeText = videoRenderer.lengthText.simpleText;
video_entry.lengthSeconds = this.calculate_length_in_seconds(video_entry.timeText);
video_entry.videoThumbnails = this.extract_thumbnail_data(video_entry.videoId);
if ('ownerBadges' in videoRenderer) {
video_entry.isVerified = (videoRenderer.ownerBadges[0].metadataBadgeRenderer.tooltip == 'Verified')
}
//check whether the property is available, because there can be videos without description which won't have an empty property
if(videoRenderer.hasOwnProperty("descriptionSnippet")){
video_entry.description = videoRenderer.descriptionSnippet.runs[0].text;
}
videoEntryList.push(video_entry);
})
return videoEntryList
}
//calculates the length of the video in seconds as a number from the string "hh:mm:ss"
static calculate_length_in_seconds(lengthText){
let length_seconds = 0;
const hours_minutes_seconds = lengthText.match(/(\d(\d)*)/g);
// calculate the time in seconds for every entry
for(let i = hours_minutes_seconds.length-1; i >= 0; i--){
length_seconds += Math.pow(60, (hours_minutes_seconds.length - i - 1)) * hours_minutes_seconds[i];
}
return length_seconds;
}
//calculates the number of views from the corresponding string "xxx,xxx,xxx,xxx"
static calculate_view_count(viewText){
let view_count = 0;
const viewers_three_split = viewText.match(/(\d(\d)*)/g);
for(let i = 0; i < viewers_three_split.length; i++){
view_count = view_count * 1000 + Number(viewers_three_split[i]);
}
return view_count;
}
//calculates the rough timestamp of the release - very exact for minutes, medium exact for hours and loosy exact for days
static calculate_published(publishText, currentTime){
const time_published_ago = publishText.match(/(\d(\d)*)/g);
let time_span;
if(publishText.indexOf("day") > -1){
// posted x days ago
time_span = Number(time_published_ago[0]) * 24 * 360 * 1000;
}else if(publishText.indexOf("hours") > -1){
// posted x hours ago
time_span = Number(time_published_ago[0]) * 360 * 1000;
}else{
// posted x minutes ago, just in case
time_span = Number(time_published_ago[0]) * 60 * 1000;
}
return currentTime - time_span;
}
//creates a list of dictionaries with the relevant data for the different thumbnails
//TODO maxres.jpg does not load even tho it loads in invidious
static extract_thumbnail_data(videoId){
//TODO: make customizable
return [
this.create_thumbnail_dictionary("maxres", `https://i.ytimg.com/vi/${videoId}/maxres.jpg`, 1280, 720),
this.create_thumbnail_dictionary("maxresdefault", `https://i.ytimg.com/vi/${videoId}/maxresdefault.jpg`, 1280, 720),
this.create_thumbnail_dictionary("sddefault", `https://i.ytimg.com/vi/${videoId}/sddefault.jpg`, 640, 480),
this.create_thumbnail_dictionary("high", `https://i.ytimg.com/vi/${videoId}/hqdefault.jpg`, 480, 360),
this.create_thumbnail_dictionary("medium", `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`, 320, 180),
this.create_thumbnail_dictionary("default", `https://i.ytimg.com/vi/${videoId}/default.jpg`, 120, 90),
this.create_thumbnail_dictionary("start", `https://i.ytimg.com/vi/${videoId}/1.jpg`, 120, 90),
this.create_thumbnail_dictionary("middle", `https://i.ytimg.com/vi/${videoId}/2.jpg`, 120, 90),
this.create_thumbnail_dictionary("end", `https://i.ytimg.com/vi/${videoId}/3.jpg`, 120, 90),
];
}
static create_thumbnail_dictionary(Quality, Url, Width, Height){
return{
quality: Quality,
url: Url,
width: Width,
height: Height
};
}
// starting point
static async scrapeTrendingPage({ page = 'default', geoLocation = null, parseCreatorOnRise = false } = {}) {
const requestData = await requester.requestTrendingPage(geoLocation, page)
return htmlParser.parseNewHtml(requestData.data, parseCreatorOnRise)
}
}
module.exports = YoutubeScraper

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc