Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

instagram-screen-scrape

Package Overview
Dependencies
Maintainers
1
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

instagram-screen-scrape - npm Package Compare versions

Comparing version 1.0.1 to 2.0.0

lib/comment.schema.json

6

bin/index.js
#!/usr/bin/env node
try {
require('coffee-script/register');
// in production, this will fail if coffeescript isn't installed, but the
// coffee is compiled anyway, so it doesn't matter
} catch(e){}
require('../lib/cli');

61

lib/cli.js

@@ -1,29 +0,50 @@

// Generated by CoffeeScript 1.9.3
(function() {
var ArgumentParser, InstagramPosts, JSONStream, argparser, argv, packageInfo;
// Generated by CoffeeScript 1.10.0
var ArgumentParser, InstagramComments, InstagramPosts, JSONStream, argparser, argv, packageInfo, subcommand, subparser;
InstagramPosts = require('./');
InstagramPosts = require('./posts');
packageInfo = require('../package');
InstagramComments = require('./comments');
ArgumentParser = require('argparse').ArgumentParser;
packageInfo = require('../package');
JSONStream = require('JSONStream');
ArgumentParser = require('argparse').ArgumentParser;
argparser = new ArgumentParser({
version: packageInfo.version,
addHelp: true,
description: packageInfo.description
});
JSONStream = require('JSONStream');
argparser.addArgument(['--username', '-u'], {
type: 'string',
help: 'Username of the account to scrape',
required: true
});
argparser = new ArgumentParser({
version: packageInfo.version,
addHelp: true,
description: packageInfo.description
});
argv = argparser.parseArgs();
subparser = argparser.addSubparsers({
dest: 'subcommand'
});
(new InstagramPosts(argv)).pipe(JSONStream.stringify('[', ',\n', ']\n')).pipe(process.stdout);
subcommand = subparser.addParser('comments', {
description: 'Scrape comments for a given post',
addHelp: true
});
}).call(this);
subcommand.addArgument(['-p', '--post'], {
type: 'string',
help: 'Alphanumeric post id to scrape. This is unique across all of Instagram (so the username does not need to be specified when this option is used), and the id can be gotten from Instagram URLs with the format `instagram.com/p/<post id>`.'
});
subcommand = subparser.addParser('posts', {
description: 'Scrape posts by username or post id',
addHelp: true
});
subcommand.addArgument(['-u', '--username'], {
type: 'string',
help: 'Username of the account to scrape.'
});
argv = argparser.parseArgs();
subcommand = argv.subcommand;
delete argv.subcommand;
(subcommand === 'posts' ? new InstagramPosts(argv) : new InstagramComments(argv)).pipe(JSONStream.stringify('[', ',\n', ']\n')).pipe(process.stdout);

@@ -1,139 +0,5 @@

// Generated by CoffeeScript 1.9.3
(function() {
var InstagramPosts, Readable, getPosts, jsonRequest,
bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
extend = function(child, parent) { for (var key in parent) { if (hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; },
hasProp = {}.hasOwnProperty;
Readable = require('readable-stream').Readable;
jsonRequest = require('./util').jsonRequest;
/**
* Make a request for a Instagram page, parse the response, and get all the
posts.
* @param {String} username
* @param {String} [startingId] The maximum post id query for (the lowest one
from the last request), or undefined if this is the first request.
* @return {Stream} A stream of posts
*/
getPosts = function(username, startingId) {
return jsonRequest('items.*', {
uri: "https://instagram.com/" + username + "/media/",
qs: {
'max_id': startingId
}
});
};
/**
* Stream that scrapes as many posts as possible for a given user.
* @param {String} options.username
* @return {Stream} A stream of post objects.
*/
InstagramPosts = (function(superClass) {
extend(InstagramPosts, superClass);
InstagramPosts.prototype._lock = false;
InstagramPosts.prototype._minPostId = void 0;
function InstagramPosts(arg) {
this.username = arg.username;
this.destroy = bind(this.destroy, this);
this._read = bind(this._read, this);
InstagramPosts.__super__.constructor.call(this, {
highWaterMark: 16,
objectMode: true
});
this._readableState.destroyed = false;
}
InstagramPosts.prototype._read = function() {
var hasMorePosts, lastPost;
if (this._lock) {
return;
}
this._lock = true;
if (this._readableState.destroyed) {
this.push(null);
return;
}
hasMorePosts = false;
lastPost = void 0;
return getPosts(this.username, this._minPostId).on('error', (function(_this) {
return function(err) {
return _this.emit('error', err);
};
})(this)).on('data', (function(_this) {
return function(rawPost) {
var post;
hasMorePosts = true;
post = {
id: rawPost.code,
username: _this.username,
time: +rawPost['created_time'],
type: rawPost.type,
like: rawPost.likes.count,
comment: rawPost.comments.count
};
if (rawPost.caption != null) {
post.text = rawPost.caption.text;
}
if (rawPost.images != null) {
post.image = rawPost.images['standard_resolution'].url;
}
if (rawPost.videos != null) {
post.video = rawPost.videos['standard_resolution'].url;
}
_this._minPostId = rawPost.id;
if (lastPost != null) {
_this.push(lastPost);
}
return lastPost = post;
};
})(this)).on('end', (function(_this) {
return function() {
if (hasMorePosts) {
_this._lock = false;
}
if (lastPost != null) {
_this.push(lastPost);
}
if (!hasMorePosts) {
return _this.push(null);
}
};
})(this));
};
InstagramPosts.prototype.destroy = function() {
if (this._readableState.destroyed) {
return;
}
this._readableState.destroyed = true;
return this._destroy((function(_this) {
return function(err) {
if (err) {
_this.emit('error', err);
}
return _this.emit('close');
};
})(this));
};
InstagramPosts.prototype._destroy = function(cb) {
return process.nextTick(cb);
};
return InstagramPosts;
})(Readable);
module.exports = InstagramPosts;
}).call(this);
// Generated by CoffeeScript 1.10.0
module.exports = {
InstagramPosts: require('./posts'),
InstagramComments: require('./comments')
};

@@ -12,4 +12,5 @@ {

"time": {
"type": "integer",
"description": "UNIX time at which the post was made"
"description": "UNIX time at which the post was made",
"minimum": 0,
"type": "integer"
},

@@ -23,7 +24,7 @@ "type": {

},
"like": {
"likes": {
"type": "integer",
"minimum": 0
},
"comment": {
"comments": {
"type": "integer",

@@ -35,9 +36,5 @@ "minimum": 0

},
"image": {
"media": {
"type": "string",
"format": "uri"
},
"video": {
"type": "string",
"format": "uri"
}

@@ -53,4 +50,5 @@ },

"comment",
"text"
"text",
"media"
]
}

@@ -1,36 +0,33 @@

// Generated by CoffeeScript 1.9.3
(function() {
var JSONStream, jsonRequest, request, zlib;
// Generated by CoffeeScript 1.10.0
var JSONStream, jsonRequest, request, zlib;
request = require('request');
request = require('request');
JSONStream = require('JSONStream');
JSONStream = require('JSONStream');
zlib = require('zlib');
zlib = require('zlib');
jsonRequest = function(jsonSelector, options) {
var outStream;
outStream = JSONStream.parse(jsonSelector);
options.gzip = true;
request(options).on('response', function(response) {
var encoding, gunzip, ref;
if (response.statusCode === 200) {
encoding = (ref = response.headers['content-encoding']) != null ? ref.trim().toLowerCase() : void 0;
if (encoding === 'gzip') {
gunzip = zlib.createGunzip();
return response.pipe(gunzip).pipe(outStream);
} else {
return response.pipe(outStream);
}
jsonRequest = function(jsonSelector, options) {
var outStream;
outStream = JSONStream.parse(jsonSelector);
options.gzip = true;
request(options).on('response', function(response) {
var encoding, gunzip, ref;
if (response.statusCode === 200) {
encoding = (ref = response.headers['content-encoding']) != null ? ref.trim().toLowerCase() : void 0;
if (encoding === 'gzip') {
gunzip = zlib.createGunzip();
return response.pipe(gunzip).pipe(outStream);
} else {
throw new Error("Instagram returned status code: " + response.statusCode);
return response.pipe(outStream);
}
});
return outStream;
};
} else {
return outStream.emit('error', "Instagram returned status code: " + response.statusCode);
}
});
return outStream;
};
module.exports = {
jsonRequest: jsonRequest
};
}).call(this);
module.exports = {
jsonRequest: jsonRequest
};
{
"name": "instagram-screen-scrape",
"description": "scrape public instagram data w/out API access",
"version": "1.0.1",
"version": "2.0.0",
"author": "Sean Lang <slang800@gmail.com>",

@@ -14,12 +14,13 @@ "bin": {

"JSONStream": "^0.10.0",
"argparse": "^1.0.2",
"readable-stream": "^1.0.33",
"request": "^2.55.0"
"argparse": "^1.0.7",
"readable-stream": "^2.0.4",
"request": "^2.65.0",
"tough-cookie": "^2.2.1"
},
"devDependencies": {
"coffee-script": "^1.9.1",
"coffee-script": "^1.10.0",
"isstream": "^0.1.2",
"json-schema": "^0.2.2",
"mocha": "^2.2.4",
"should": "^5.2.0"
"mocha": "^2.3.4",
"should": "^7.1.1"
},

@@ -26,0 +27,0 @@ "homepage": "https://github.com/slang800/instagram-screen-scrape",

# Instagram Screen Scrape
[![Build Status](http://img.shields.io/travis/slang800/instagram-screen-scrape.svg?style=flat-square)](https://travis-ci.org/slang800/instagram-screen-scrape) [![NPM version](http://img.shields.io/npm/v/instagram-screen-scrape.svg?style=flat-square)](https://www.npmjs.org/package/instagram-screen-scrape) [![NPM license](http://img.shields.io/npm/l/instagram-screen-scrape.svg?style=flat-square)](https://www.npmjs.org/package/instagram-screen-scrape)
A tool for scraping public data from Instagram, without needing to get permission from Instagram. It can (theoretically) scrape anything that a non-logged-in user can see. But, right now it only supports getting posts for a given username.
A tool for scraping public data from Instagram, without needing to get permission from Instagram. It can (theoretically) scrape anything that a non-logged-in user can see. But, right now it only supports getting posts for a given username or comments for a given post.

@@ -11,12 +11,22 @@ ## Example

```bash
$ instagram-screen-scrape --username carrotcreative
[{"id":"0toxcII4Eo","username":"carrotcreative","time":1427420497,"type":"image","like":82,"comment":3,"text":"Our CTO, @kylemac, speaking on the #LetsTalkCulture panel tonight @paperlesspost.","image":"https://scontent.cdninstagram.com/hphotos-xaf1/t51.2885-15/e15/11055816_398297847022038_803876945_n.jpg"},
{"id":"0qPcnuI4Pr","username":"carrotcreative","time":1427306556,"type":"image","like":80,"comment":4,"text":"#bitchesbebakin took it to another level today for @nporteschaikin and @slang800's #Carrotversaries today.","image":"https://scontent.cdninstagram.com/hphotos-xaf1/t51.2885-15/e15/10959049_1546104325652055_1320782099_n.jpg"},
{"id":"0WLnjlo4Ft","username":"carrotcreative","time":1426633460,"type":"image","like":61,"comment":1,"text":"T-shirts speak louder than words. Come find us @sxsw.","image":"https://scontent.cdninstagram.com/hphotos-xfa1/t51.2885-15/e15/11032904_789885121108568_378908081_n.jpg"},
$ instagram-screen-scrape posts --username carrotcreative
[{"id":"0toxcII4Eo","username":"carrotcreative","time":1427420497,"type":"image","likes":82,"comments":3,"text":"Our CTO, @kylemac, speaking on the #LetsTalkCulture panel tonight @paperlesspost.","media":"https://scontent.cdninstagram.com/hphotos-xaf1/t51.2885-15/e15/11055816_398297847022038_803876945_n.jpg"},
{"id":"0qPcnuI4Pr","username":"carrotcreative","time":1427306556,"type":"image","likes":80,"comments":4,"text":"#bitchesbebakin took it to another level today for @nporteschaikin and @slang800's #Carrotversaries today.","media":"https://scontent.cdninstagram.com/hphotos-xaf1/t51.2885-15/e15/10959049_1546104325652055_1320782099_n.jpg"},
{"id":"0WLnjlo4Ft","username":"carrotcreative","time":1426633460,"type":"image","likes":61,"comments":1,"text":"T-shirts speak louder than words. Come find us @sxsw.","media":"https://scontent.cdninstagram.com/hphotos-xfa1/t51.2885-15/e15/11032904_789885121108568_378908081_n.jpg"},
```
We can also scrape comments:
```bash
$ instagram-screen-scrape comments --post 0qPcnuI4Pr
[{"id":"948651188581269518","username":"johnlustina","time":1427308055,"text":"@margeauxlustina"},
{"id":"948682633420963943","username":"rita_xo","time":1427311804,"text":"👌@emilykalen"},
{"id":"948734454231433861","username":"david_berkhin","time":1427317981,"text":"looks so good!"},
{"id":"948824521079751272","username":"k.kate","time":1427328718,"text":"Macarons or a Petri dish full of cells? ¯\\_(ツ)_/¯"}]
```
By default, there is 1 line per post, making it easy to pipe into other tools. The following example uses `wc -l` to count how many posts are returned. As you can see, I don't post much.
```bash
$ instagram-screen-scrape -u slang800 | wc -l
$ instagram-screen-scrape posts -u slang800 | wc -l
2

@@ -29,3 +39,3 @@ ```

```coffee
InstagramPosts = require 'instagram-screen-scrape'
{InstagramPosts} = require 'instagram-screen-scrape'

@@ -36,6 +46,5 @@ # create the stream

# do something interesting with the stream
streamOfPosts.on('readable', ->
streamOfPosts.on('data', (post) ->
# since it's an object-mode stream, we get objects from it and don't need to
# parse JSON or anything.
post = streamOfPosts.read()
# parse JSON or anything

@@ -48,3 +57,3 @@ # the time field is represented in UNIX time

console.log "slang800's post from #{time.toLocaleDateString()} got
#{post.like} like(s), and #{post.comment} comment(s)"
#{post.likes} like(s), and #{post.comments} comment(s)"
)

@@ -57,3 +66,3 @@ ```

var InstagramPosts, streamOfPosts;
InstagramPosts = require('instagram-screen-scrape');
InstagramPosts = require('instagram-screen-scrape').InstagramPosts;

@@ -64,6 +73,4 @@ streamOfPosts = new InstagramPosts({

streamOfPosts.on('readable', function() {
var post, time;
post = streamOfPosts.read();
time = new Date(post.time * 1000);
streamOfPosts.on('data', function(post) {
var time = new Date(post.time * 1000);
console.log([

@@ -73,5 +80,5 @@ "slang800's post from ",

" got ",
post.like,
post.likes,
" like(s), and ",
post.comment,
post.comments,
" comment(s)"

@@ -82,2 +89,19 @@ ].join(''));

And we can scrape comments in a similar manner (shown in CoffeeScript):
```coffee
{InstagramComments} = require 'instagram-screen-scrape'
streamOfComments = new InstagramComments(post: '0qPcnuI4Pr')
# do something interesting with the stream
streamOfComments.on('data', (comment) ->
# the time field is represented in UNIX time
time = new Date(comment.time * 1000)
console.log "#{comment.username} commented on #{time.toLocaleDateString()}:
#{comment.text}"
)
```
## Why?

@@ -84,0 +108,0 @@ The fact that Instagram requires an app to be registered just to access the data that is publicly available on their site is excessively controlling. Scripts should be able to consume the same data as people, and with the same level of authentication. Sadly, Instagram doesn't provide an open, structured, and machine readable API.

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc