impurge - npm Package Compare versions

Comparing version 0.0.5 to 0.0.6

190

impurge.js

		@@ -1,4 +0,4 @@
		var cheerio = require("cheerio")
		, request = require("request")
		var impurge = [];
		var $ = require("cheerio"),
		request = require("request")
		var impurge = [];

		@@ -8,94 +8,106 @@ module.exports = impurge;
		//pattern used for extraction of the links from the html
		var imgur_url_pattern = RegExp("^http://((www)\|(i)\.)?imgur.com/[./a-zA-Z0-9&,]+","ig");
		var imgur_url_pattern = RegExp("^http://((www)\|(i)\.)?imgur.com/[./a-zA-Z0-9&,]+", "ig");

		//patterns used to check URL patterns
		var imgur_album_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/a/([a-zA-Z0-9]+)","i");
		var imgur_gallery_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/gallery/([a-zA-Z0-9]+)","i");
		var imgur_hashes_pattern = RegExp("imgur\.com/(([a-zA-Z0-9]{5,7}[&,]?)+)","i");
		var imgur_image_pattern = RegExp("^http://(www\.)?(i\.)?imgur\.com/.{3,7}\.((jpg)\|(gif))","ig");
		var imgur_album_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/a/([a-zA-Z0-9]+)", "i");
		var imgur_gallery_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/gallery/([a-zA-Z0-9]+)", "i");
		var imgur_hashes_pattern = RegExp("imgur\.com/(([a-zA-Z0-9]{5,7}[&,]?)+)", "i");
		var imgur_image_pattern = RegExp("^http://(www\.)?(i\.)?imgur\.com/.{3,7}\.((jpg)\|(gif)\|(png))", "ig");

		impurge.get_text_imgur_links = function(text) {
		var imgur_url_pattern = RegExp("http(s)?://((m)\.\|(www)\.\|((i)\.))?imgur.com/(a/)?[a-zA-Z0-9&]+((\.jpg)\|(\.gif)\|(\.png))?", "igm");
		var matches = imgur_url_pattern.exec(text);
		var urls = []
		while ((matches = imgur_url_pattern.exec(text)) !== null) {
		urls.push(matches[0]);
		};

		console.log(urls);
		}

		impurge.is_imgur = function(url) {
		var imgur_url_pattern = RegExp("http(s)?://((m\.)\|((www)\.)\|((i)\.))?imgur.com/(a/)?[a-zA-Z0-9&]+((\.jpg)\|(\.gif)\|(\.png))?", "i");
		var match = imgur_url_pattern.exec(url);
		if (match) {
		return true; //console.log(test[0], url);
		} else {
		//console.log('NOT FOUND:', url);
		};
		}

		//determines the link provided to module
		var determine_link_type = function (url, callback) {
		if ( imgur_image_pattern.exec(url) ) {
		callback(null,'image_url',null,url);
		}
		else if ( imgur_album_url_pattern.exec(url) ) {
		var match = imgur_album_url_pattern.exec(url);
		if (match){
		var hashes = match[1].split(/[,&]/)
		}
		callback(null,'album_url',hashes);
		}
		else if ( imgur_gallery_url_pattern.exec(url) ) {
		var match = imgur_gallery_url_pattern.exec(url);
		if (match){
		var hashes = match[1].split(/[,&]/);
		}
		callback(null,'gallery_url',hashes);
		}
		else if ( imgur_hashes_pattern.exec(url) ) {
		var match = imgur_hashes_pattern.exec(url);
		if (match){
		var hashes = match[1].split(/[,&]/);
		}
		callback(null,'hash_url',hashes);
		}
		else {
		callback('unidentified_type');
		}
		impurge.determine_link_type = function(url, callback) {
		if (imgur_image_pattern.exec(url)) {
		callback(null, 'image_url', null, url);
		} else if (imgur_album_url_pattern.exec(url)) {
		var match = imgur_album_url_pattern.exec(url);
		if (match) {
		var hashes = match[1].split(/[,&]/)
		}
		callback(null, 'album_url', hashes);
		} else if (imgur_gallery_url_pattern.exec(url)) {
		var match = imgur_gallery_url_pattern.exec(url);
		if (match) {
		var hashes = match[1].split(/[,&]/);
		}
		callback(null, 'gallery_url', hashes);
		} else if (imgur_hashes_pattern.exec(url)) {
		var match = imgur_hashes_pattern.exec(url);
		if (match) {
		var hashes = match[1].split(/[,&]/);
		}
		callback(null, 'hash_url', hashes);
		} else {
		callback('unidentified_type');
		}

		}

		impurge.purge = function (url, callback) {
		determine_link_type(url, function (error, type, id,url) {
		if (error) {
		callback(error)
		}
		else{
		var links = [];
		if (type === 'image_url'){
		callback(null, [url])
		return;
		}
		else if (type === 'album_url'){
		var url = 'http://api.imgur.com/2/album/'+id+".json"
		}
		else if (type === 'hash_url'){
		var url = 'http://api.imgur.com/2/image/'+id+".json"
		}
		else if (type === 'gallery_url'){
		var url = 'http://api.imgur.com/2/album/'+id+".json"
		}
		else {
		callback("unknown_link_error")
		}
		request(url, function (err, res, body) {
		try{
		var api_json = JSON.parse(body);
		} catch (err) {
		callback("impurge: JSON parsing error w/following URL: "+ url);
		}

		for (var type in api_json){
		//console.log(type)
		if (type === 'image'){
		links.push(api_json[type]['links']['original']);
		callback(null, links ) ;
		}
		if (type === 'album'){

		var images_json = api_json[type]['images'];
		for (var image in images_json){
		//console.log(images_json[image]['links']['original']);
		links.push(images_json[image]['links']['original']);
		}
		callback(null, links);
		}
		}
		return;
		})
		}
		//console.log("type: "+ type)
		})

		}
		impurge.purge = function(url, callback) {
		impurge.determine_link_type(url, function(error, type, id, url) {
		if (error) {
		callback(error)
		} else {
		var links = [];
		if (type === 'image_url') {
		callback(null, [url])
		return;
		} else if (type === 'album_url') {
		var url = 'http://api.imgur.com/2/album/' + id + ".json"
		} else if (type === 'hash_url') {
		var url = 'http://api.imgur.com/2/image/' + id + ".json"
		} else if (type === 'gallery_url') {
		var url = 'http://api.imgur.com/2/album/' + id + ".json"
		} else {
		callback("unknown_link_error")
		}
		request(url, function(err, res, body) {
		try {
		var api_json = JSON.parse(body);
		} catch (err) {
		callback("impurge: JSON parsing error w/following URL: " + url);
		}

		for (var type in api_json) {
		//console.log(type)
		if (type === 'image') {
		links.push(api_json[type]['links']['original']);
		callback(null, links);
		}
		if (type === 'album') {

		var images_json = api_json[type]['images'];
		for (var image in images_json) {
		//console.log(images_json[image]['links']['original']);
		links.push(images_json[image]['links']['original']);
		}
		callback(null, links);
		}
		}
		return;
		})
		}
		//console.log("type: "+ type)
		})

		}

package.json

		{
		"name": "impurge",
		"main": "impurge.js",
		"subdomain": "impurge",
		"scripts": {
		"start": "impurge.js",
		"test": "mocha -t 4000 test/test.js"
		},
		"version": "0.0.5",
		"description": "takes any imgur url and returns an array of direct image files",
		"engines": {
		"node": "0.8.x"
		},
		"dependencies": {
		"mocha": "*",
		"request": "~2.11.4",
		"cheerio": "*"
		},
		"name": "impurge",
		"main": "impurge.js",
		"subdomain": "impurge",
		"scripts": {
		"start": "impurge.js",
		"test": "node test/test.js"
		},
		"version": "0.0.6",
		"description": "takes any imgur url and returns an array of direct image files",
		"engines": {
		"node": "0.8.x"
		},
		"dependencies": {
		"mocha": "*",
		"request": "~2.11.4",
		"cheerio": "*"
		},
		"devDependencies": {
		"should": "~1.2.1"
		},
		"keywords": [
		"imgur",
		"parser",
		"image"
		],
		"author": "hortinstein",
		"license": "BSD",
		"readmeFilename": "README.md"
		}
		"should": "~1.2.1"
		},
		"keywords": [
		"imgur",
		"parser",
		"image"
		],
		"author": "hortinstein",
		"license": "BSD",
		"readmeFilename": "README.md"
		}

README.md

		@@ -1,15 +0,41 @@
		impurge
		#impurge

		This is a simple module meant to extract image URLS from imgur

		<pre><code>var impurge = require('impurge');

		Example:
		```js
		var impurge = require('impurge');
		impurge.purge("http://imgur.com/IvpcP", function (e,r) {
		console.log(r)
		});
		</code></pre>
		```

		will give you
		will result in:

		<code>http://i.imgur.com/IvpcP.jpg
		```js
		http://i.imgur.com/IvpcP.jpg
		```

		This works for the three type of Imgur links i have encounted, and will spit out all links for albums as well
		The new test file included will check the currency of the regular expressions against live reddit data to ensure imgur is not changing link formats and will look for any it does not recognize


		I have added a few additional methods that I used for testing the accuracy of my regex's
		```js
		impurge.is_imgur(url); //will output true if the url is imgur
		```

		the following takes a string input and will return an array of imgur links contained within
		```js
		impurge.get_text_imgur_links(text); // will return an array of links
		```

		and finally i exposed a function to determine the link type (previously used internally)
		```js
		impurge.determine_link_type(url, function(err, type, id, i_url){
		//err is given if link is not recognized
		//type is image_url, album_url, gallery_url, hash_url
		//id is the id for the link (if applicable)
		//i_url is the image url if this is a direct image
		});
		```

104

test/test.js

		@@ -1,54 +0,58 @@
		var should = require('should');
		var request = require('request');
		var stream = require('stream');

		var impurge = require('../impurge.js');

		describe('impurge', function(){
		describe('image url', function () {
		it('should find 1 picture', function(done){
		impurge.purge("http://i.imgur.com/AXvN0Mq.png", function (e,r) {
		r.length.should.equal(1)
		r[0].should.equal('http://i.imgur.com/AXvN0Mq.png');

		done();
		})
		});
		});
		describe('image hash url', function () {
		it('should find 1 picture', function(done){
		impurge.purge("http://imgur.com/WWm8Cl6", function (e,r) {
		r.length.should.equal(1);
		r[0].should.equal('http://i.imgur.com/WWm8Cl6.jpg');
		done();
		})
		});
		});
		describe('album url', function () {
		it('should find 2 pictures', function(done){
		impurge.purge("http://imgur.com/a/9uIQf", function (e,r) {
		r.length.should.equal(2);
		r[0].should.equal('http://i.imgur.com/Z6ft3xZ.jpg');
		r[1].should.equal('http://i.imgur.com/xSW842C.jpg');
		done();
		})
		});
		});
		describe('image hash url', function () {
		it('should find 1 picture', function(done){
		impurge.purge("http://imgur.com/SKiDPaz", function (e,r) {
		r.length.should.equal(1);
		r[0].should.equal('http://i.imgur.com/SKiDPaz.jpg');
		done();
		})
		});
		});
		describe('gallery hash url', function () {
		it('should find 4 pictures', function(done){
		impurge.purge("http://imgur.com/gallery/Ptn4M", function (e,r) {
		r.length.should.equal(4);
		r[0].should.equal('http://i.imgur.com/tYx0Ebf.gif');
		done();
		})
		});
		});
		});
		//This function tests impurge by getting 100 gonewild posts

		function getImgurPosts() {
		var readStream = new stream.Readable({
		objectMode: true
		});
		var usernames = [];
		try {
		request({
		url: 'http://www.reddit.com/r/gonewild.json?limit=100&after=',
		json: true
		}, function(err, res, obj) {
		obj.data.children.forEach(function(item) {
		if (item.kind === 't3') readStream.emit('url', item.data.url);
		});
		});
		} catch (error) {
		callback("Error retrieving " + test, null)
		}
		readStream._read = function(n) {
		if (!started) loop();
		if (!started) userJSON(username);
		started = true;
		};
		return readStream
		};

		//need to look into converting this to the transform type in blog post below
		//http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
		getImgurPosts()
		.on('url', function(url) {
		if (!impurge.is_imgur(url)) {
		console.log('eNOT IMGUR', url);
		} else {
		impurge.determine_link_type(url, function(err, type, id, i_url) {
		if (err) {
		console.log('e' + err, url);
		throw new Error('unknown imgur link type: ' + url)
		} else {
		//console.log(url, 'is a ', type, 'with id:', id, 'and url', i_url);
		}
		});
		}
		});

		//this is a test for text it should return 5 links
		// var testTextAlbum = 'http://imgur.com/a/o7AVs is a album_url with id: [ o7AVs ] and url undefined ';
		// var testTextImage = 'http://i.imgur.com/4aCgHc7.jpg is a image_url with id: null and url http://i.imgur.com/4aCgHc7.jpg ';
		// var testTextHash = 'http://i.imgur.com/oM1mFEd.jpg is a hash_url with id: [ oM1mFEd] and url undefined ';
		// var testTextAlbum2 = 'http://imgur.com/a/h4gt1 is a album_url with id: [ h4gt1 ] and url undefined';
		// var testText = testTextAlbum + testTextImage + testTextHash + testTextAlbum2;
		// console.log(testText);
		// var comment_links = impurge.get_text_imgur_links(testText);

impurge - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics