Socket
Socket
Sign inDemoInstall

kickstarter-crawler

Package Overview
Dependencies
18
Maintainers
1
Versions
30
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.1.3 to 2.0.0

index.test.js

140

index.js

@@ -1,1 +0,139 @@

module.exports.project = require('./lib/project');
const cheerio = require('cheerio');
const axios = require('axios');
module.exports = async function kickstarterCrawler(url) {
try {
let res = await axios.get(url);
if (res.data) {
let data = parseHTML(res);
return data;
}
} catch(err) {
console.error(err);
}
}
function parseHTML(res) {
const data = {};
// parse html ie "load"
const $ = cheerio.load(res.data);
const was_cancelled = $('#main_content').hasClass('Campaign-state-canceled');
const title =
$('.project-profile__title')
.eq(0)
.text()
.trim();
const creator =
$('.hero__link')
.eq(1)
.text()
.trim();
const description =
$('.project-profile__blurb')
.text()
.trim();
// Sometimes kickstarter has a "projects we love" section before category/location
const has_flag = $('.NS_projects__category_location').children().length === 3
const category_location_offset = has_flag ? 1 : 0;
// leave unstructured (ie US vs international formatting)
const location =
$('.NS_projects__category_location a')
.eq(category_location_offset)
.text()
.trim()
.split(', ');
const category =
$('.NS_projects__category_location a')
.eq(category_location_offset + 1)
.text()
.trim();
let duration =
$('.NS_campaigns__funding_period .f5')
.text()
.trim();
duration = duration && duration.split('\n')[1];
duration = Number(duration.replace(/[^0-9]/g, ''));
const startdate =
$('.NS_campaigns__funding_period time')
.eq(0)
.text()
.trim();
const enddate =
$('.NS_campaigns__funding_period time')
.eq(1)
.text()
.trim();
let funding =
$('.money')
.eq(1)
.text();
funding = funding && Number(funding.replace(/[^0-9]+/g, ''));
let goal =
$('.money')
.eq(2)
.text();
goal = goal && Number(goal.replace(/[^0-9]+/g, ''));
let backers =
$('h3.mb0')
.eq(1)
.text();
backers = backers && Number(backers.replace(/[^0-9]/g, ''));
const n = $('ol li .pledge__backer-stats').length;
let pledges = [];
for (let i = 0; i < n; i++) {
let amount =
$('ol li .pledge__amount')
.eq(i)
.text();
amount = amount && amount.match(/[1-9]+[,]*[0-9]*/)[0];
amount = amount.replace(/,/, '');
let backers =
$('ol li .pledge__backer-stats')
.eq(i)
.text();
backers = backers && backers.replace(/[^0-9]/g, '');
backers = backers.replace(/,/, '');
pledges.push([Number(amount), Number(backers)]);
}
data.title = title;
data.creator = creator;
data.description = description;
data.category = category;
data.location = location;
data.duration = duration;
data.startdate = startdate;
data.enddate = enddate;
data.funding = funding;
data.goal = goal;
data.backers = backers;
data.pledges = pledges;
return data;
}

42

package.json
{
"name": "kickstarter-crawler",
"version": "0.1.3",
"description": "Crawl kickstarter project data (30 + 8n data points - where n is the # of pledges).",
"version": "2.0.0",
"description": "Kickstarter crawler that does what you think it would",
"main": "index.js",
"homepage": "https://github.com/ghostsnstuff/kickstarter-crawler",
"authors": "Jared Halpert <jaredhalpert90@gmail.com> (https://github.com/ghostsnstuff)",
"keywords": [
"kickstarter",
"crawler",
"crowdfunding",
"data",
"spider",
"bot"
],
"scripts": {
"test": "node test/static-spec && node test/type-spec && node test/queue-spec"
"test": "mocha index.test.js"
},
"repository": {
"type": "git",
"url": "https://github.com/ghostsnstuff/kickstarter-crawler.git"
},
"bugs": {
"url": "https://github.com/ghostsnstuff/kickstarter-crawler/issues"
},
"license": "MIT",
"author": "jared lamont",
"license": "ISC",
"dependencies": {
"request": "*",
"cheerio": "*",
"colors": "*",
"tape": "*",
"crapi": "*",
"is-url": "*"
"axios": "^0.19.2",
"cheerio": "^1.0.0-rc.3"
},
"preferGlobal": "true",
"bin": {
"ks": "./lib/cli.js"
},
"engines": {
"node": "*"
"devDependencies": {
"mocha": "^8.1.0"
}
}

@@ -1,227 +0,34 @@

## kickstarter-crawler
[![Build Status](https://travis-ci.org/ghostsnstuff/kickstarter-crawler.svg?branch=master)](https://travis-ci.org/ghostsnstuff/kickstarter-crawler)
[![NPM](https://nodei.co/npm/kickstarter-crawler.png?downloads=true)](https://nodei.co/npm/kickstarter-crawler/)
##### Returns **30 + 8n** data points - where n is the number of pledges
##### Analyze **61,356** kickstarter projects using [crapi](https://github.com/ghostsnstuff/crapi)
## Installation
npm install kickstarter-crawler -g
## Test
npm test
## Getting Started
[Examples](https://github.com/ghostsnstuff/kickstarter-crawler/blob/master/README.md#examples)<br>
[API](https://github.com/ghostsnstuff/kickstarter-crawler/blob/master/README.md#API)<br>
[The crawler as a service](http://kcaas.io)<br>
### Examples
The following example crawls a project collecting **general**, **funding**, and **location**
related data
# Getting started
```javascript
'use strict';
const kickstarterCrawler = require('kickstarter-crawler');
const url = 'https://www.kickstarter.com/projects/maxtemkin/philosophy-posters';
let result = kickstarterCrawler(url);
result.then((data) => console.log(data));
var kickstarterCrawler = require('kickstarter-crawler');
// output
{
title: 'Philosophy Posters',
creator: 'Max Temkin',
description: 'Ten giant philosophy posters with big ideas presented simply.',
category: 'Graphic Design',
location: [ 'Chicago', 'IL' ],
duration: 30,
startdate: 'Apr 4, 2012',
enddate: 'May 4, 2012',
funding: 41167,
goal: 2000,
backers: 1393,
pledges: [ [ 20, 1003 ], [ 30, 359 ] ]
}
```
# Insallation
```
npm i kickstarter-crawler
```
var config, project;
// Project configurations
config = {
url: 'https://www.kickstarter.com/projects/maxtemkin/philosophy-posters',
fields: ['general', 'funding', 'location']
};
// Initialize the crawler
project = new kickstarterCrawler.project(config);
// Make request (crawl)
project.request(function onRequest (err, data) {
// Something broke
if (err) {
console.log(err);
return;
}
// Log crawled data
console.log(data);
});
# Test
```
** :pizza: MORE EXAMPLES COMING :pizza: **
## API
### `kickstarterCrawler.project(config)`
* {Object} project constructor<br>
* Initializes the crawler and exposes its interface<br><br>
### `config`
* {Object} project configurations<br>
* Configurations necessary to instantiate the project constructor<br><br>
### `config.url`
* {String} project profile url<br><br>
### `config.fields`
* {Array} array of project data-fields, which indicate what data points will be crawled<br>
* If *undefined*, data from *all* fields will be returned.<br>
**VALID FIELD VALUES**
* general
* time
* funding
* location
* other
* media
* pledges
### `project.request(callback)`
* Makes a HTTP request to the respective project url<br>
* @param `callback(err, data)` {Function}<br><br>
### `project.parse(HTML)`
* Parses the HTML corresponding to the respective project profile page<br>
* Returns a nested JSON of parsed data
* @param {String} `HTML`<br>
* @return {Object}
### `project.getTitle(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalTitle<br><br>
### `project.getCreator(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalCreator<br><br>
### `project.getCategory(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalCategory<br><br>
### `project.getSubCategory(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalSubCategory<br><br>
### `project.getAvatar(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalProjectAvatarURL<br><br>
### `project.getProjectUrl(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalProjectURL<br><br>
### `project.getCreatorUrl(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalCreatorURL<br><br>
### `project.getProjectVideo(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} generalProjectVideoURL<br><br>
### `project.getNumDays(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} timeNumDays<br><br>
### `project.getStartTime(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} timeStart<br><br>
### `project.getEndTime(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} timeEnd<br><br>
### `project.getDollarsRaised(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} fundingDollarsRaised<br><br>
### `project.getFundingGoal(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} fundingGoal<br><br>
### `project.getPercentRaised(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} fundingPercentRaised<br><br>
### `project.getCurrency(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} fundingCurrency<br><br>
### `project.getSuccess(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Boolean} fundingSuccess<br><br>
### `project.getBackers(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} fundingNumBackers<br><br>
### `project.getCity(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} locationCity<br><br>
### `project.getState(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} locationState<br><br>
### `project.getCountry(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} locationCountry<br><br>
### `project.getUpdates(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} otherUpdates<br><br>
### `project.getComments(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} otherComments<br><br>
### `project.getProjectsCreated(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} otherProjectsCreated<br><br>
### `project.getProjectsBacked(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} otherProjectsBacked<br><br>
### `project.getWebsiteUrl(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {String} otherWebsiteURL<br><br>
### `project.getNumImages(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} mediaNumImages<br><br>
### `project.getImages(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Array} mediaImages<br><br>
### `project.getNumPledges(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} pledgesNumPledges<br><br>
### `project.getNumLimitedPledges(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Number} pledgesNumLimitedPledges<br><br>
### `project.getPledgeAmounts(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Array} pledgesAmounts<br><br>
### `project.getPledgesData(callback)`
* @param {Function} `callback(err, data)`<br>
* `data` {Object} will consist of {Object} pledgesData<br><br>
npm run test
```
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc