You're Invited:Meet the Socket Team at BlackHat and DEF CON in Las Vegas, Aug 4-6.RSVP
Socket
Book a DemoInstallSign in
Socket

js-imdb-scraper

Package Overview
Dependencies
Maintainers
1
Versions
12
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

js-imdb-scraper - npm Package Compare versions

Comparing version

to
0.1.1

197

imdbScraper.js
import fetch from 'node-fetch';
import cheerio from 'cheerio';
import {bufferCount, firstValueFrom, from, map, mergeMap} from "rxjs";
const HEADER = {
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
};

@@ -11,3 +12,3 @@

const convertToQueryString = (showName) => {
return showName.replace(' ', '+');
return showName.replace(' ', '+');
};

@@ -18,13 +19,12 @@

const getImdbSearchPage = async (showName) => {
const showNameQuery = convertToQueryString(showName);
try {
const result = await fetch(`https://www.imdb.com/find?q=${showNameQuery}`, {
headers: HEADER,
});
const resultBody = await result.text();
return resultBody;
} catch (err) {
console.error(err);
return null;
}
const showNameQuery = convertToQueryString(showName);
try {
const result = await fetch(`https://www.imdb.com/find?q=${showNameQuery}`, {
headers: HEADER,
});
return await result.text();
} catch (err) {
console.error(err);
return null;
}
};

@@ -35,25 +35,25 @@

const getImdbResults = (resultBody) => {
const $ = cheerio.load(resultBody);
return $('table.findList > tbody > tr')
.map((i, e) => {
const url = $(e).children('td').find('a').attr('href');
const title = $(e).text().trim();
if (
// Only return titles
url.includes('/title/') &&
// Only return tv shows
title.includes(' (TV Series)') &&
// Don't show individual episodes
!title.includes('(TV Episode)')
) {
return {
title: $(e).text().trim().replace(' (TV Series)', ''),
id: $(e).children('td').find('a').attr('href').substr(7, 9),
img: getHighQualityImage(
$(e).children('td.primary_photo').find('img').attr('src')
),
};
}
})
.get();
const $ = cheerio.load(resultBody);
return $('table.findList > tbody > tr')
.map((i, e) => {
const url = $(e).children('td').find('a').attr('href');
const title = $(e).text().trim();
if (
// Only return titles
url.includes('/title/') &&
// Only return tv shows
title.includes(' (TV Series)') &&
// Don't show individual episodes
!title.includes('(TV Episode)')
) {
return {
title: $(e).text().trim().replace(' (TV Series)', ''),
id: $(e).children('td').find('a').attr('href').substr(7, 9),
img: getHighQualityImage(
$(e).children('td.primary_photo').find('img').attr('src')
),
};
}
})
.get();
};

@@ -64,3 +64,3 @@

const getHighQualityImage = (imgUrl) => {
return imgUrl.split('@.')[0] + '@._V1_UY268_CR8,0,182,268_AL_.jpg';
return imgUrl.split('@.')[0] + '@._V1_UY268_CR8,0,182,268_AL_.jpg';
};

@@ -71,22 +71,24 @@

const getAllRatings = async (imdbId) => {
let ratings = {};
try {
const page = await fetchShowImdbPage(imdbId);
const $ = cheerio.load(page);
// The page defaults to showing latest season, so we can use this to determine total number of seasons.
const lastSeasonNumber = parseInt($('#bySeason option').length + 1);
// First we get the page using the imdbId
try {
const page = await fetchShowImdbPage(imdbId);
const $ = cheerio.load(page);
// The page defaults to showing latest season, so we can use this to determine total number of seasons.
const seasons = parseInt($('#bySeason option:selected').text().trim());
// an array with all numbers from one to thirty
const seasons = Array.from(Array(lastSeasonNumber).keys()).slice(1);
// Iterate through all seasons and populate ratings object
for (let i = 1; i <= seasons; i++) {
const seasonRatings = await getSeasonRatings(imdbId, i);
ratings[i] = seasonRatings;
const source = from(seasons).pipe(
mergeMap(season => getSeasonRatings(imdbId, season)
),
bufferCount(seasons.length),
map(result => result.reduce((acc, cur) =>
cur.ratings.length ? {...acc, [cur.season]: cur.ratings} : acc
, {}))
)
return await firstValueFrom(source);
} catch (err) {
console.error(err);
return null;
}
return ratings;
} catch (err) {
console.error(err);
return null;
}
};

@@ -96,26 +98,27 @@

// Returns each episode and its rating
// { episode, rating }
// { season, ratings: { episode, rating } }
const getSeasonRatings = async (imdbId, season) => {
try {
const result = await fetch(
`https://www.imdb.com/title/${imdbId}/episodes?season=${season}`
);
const resultText = await result.text();
const $ = cheerio.load(resultText);
try {
const result = await fetch(
`https://www.imdb.com/title/${imdbId}/episodes?season=${season}`
);
const resultText = await result.text();
const $ = cheerio.load(resultText);
let seasonRatings = $(
'div.eplist > div > div.info > div.ipl-rating-widget > div.ipl-rating-star'
)
.map(function (e, i) {
return {
episode: e + 1,
rating: $(this).children('span.ipl-rating-star__rating').text(),
};
})
.get();
return seasonRatings;
} catch (err) {
console.error(err);
return null;
}
const ratings = $(
'div.eplist > div > div.info > div.ipl-rating-widget > div.ipl-rating-star'
)
.map(function (e) {
return {
episode: e + 1,
rating: $(this).children('span.ipl-rating-star__rating').text(),
};
})
.get();
return {season, ratings}
} catch (err) {
console.error(err);
return null;
}
};

@@ -126,15 +129,14 @@

const fetchShowImdbPage = async (imdbId) => {
try {
const result = await fetch(
`https://www.imdb.com/title/${imdbId}/episodes`,
{
headers: HEADER,
}
);
const resultBody = await result.text();
return resultBody;
} catch (err) {
console.error(err);
return null;
}
try {
const result = await fetch(
`https://www.imdb.com/title/${imdbId}/episodes`,
{
headers: HEADER,
}
);
return await result.text();
} catch (err) {
console.error(err);
return null;
}
};

@@ -146,6 +148,5 @@

const getSearchResults = async (show) => {
const page = await getImdbSearchPage(show);
if (page == null) return null;
const showDetails = getImdbResults(page);
return showDetails;
const page = await getImdbSearchPage(show);
if (page == null) return null;
return getImdbResults(page);
};

@@ -164,6 +165,6 @@

export default {
getSearchResults,
getAllRatings,
getSeasonRatings,
getNumSeasons,
getSearchResults,
getAllRatings,
getSeasonRatings,
getNumSeasons,
};
{
"name": "js-imdb-scraper",
"version": "0.0.10",
"description": "",
"version": "0.1.1",
"description": "Simple IMDB scraper",
"main": "imdbScraper.js",

@@ -9,5 +9,5 @@ "type": "module",

"cheerio": "^1.0.0-rc.3",
"node-fetch": "^2.6.0"
"node-fetch": "^2.6.0",
"rxjs": "^7.5.2"
},
"devDependencies": {},
"scripts": {

@@ -14,0 +14,0 @@ "start": "node imdbScraper.js",

@@ -84,14 +84,17 @@ # js-imdb-scraper

```js
[
{ episode: 1, rating: '8.9' },
{ episode: 2, rating: '8.6' },
{ episode: 3, rating: '8.4' },
{ episode: 4, rating: '8.7' },
{ episode: 5, rating: '8.7' },
{ episode: 6, rating: '8.9' },
{ episode: 7, rating: '9.5' },
{ episode: 8, rating: '8.8' },
{ episode: 9, rating: '9.4' },
{ episode: 10, rating: '9.7' }
]
{
season: 1,
ratings: [
{ episode: 1, rating: '8.9' },
{ episode: 2, rating: '8.6' },
{ episode: 3, rating: '8.4' },
{ episode: 4, rating: '8.7' },
{ episode: 5, rating: '8.7' },
{ episode: 6, rating: '8.9' },
{ episode: 7, rating: '9.5' },
{ episode: 8, rating: '8.8' },
{ episode: 9, rating: '9.4' },
{ episode: 10, rating: '9.7' }
],
}
```

@@ -98,0 +101,0 @@