Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

impurge

Package Overview
Dependencies
Maintainers
1
Versions
16
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

impurge - npm Package Compare versions

Comparing version 0.0.5 to 0.0.6

190

impurge.js

@@ -1,4 +0,4 @@

var cheerio = require("cheerio")
, request = require("request")
var impurge = [];
var $ = require("cheerio"),
request = require("request")
var impurge = [];

@@ -8,94 +8,106 @@ module.exports = impurge;

//pattern used for extraction of the links from the html
var imgur_url_pattern = RegExp("^http://((www)|(i)\.)?imgur.com/[./a-zA-Z0-9&,]+","ig");
var imgur_url_pattern = RegExp("^http://((www)|(i)\.)?imgur.com/[./a-zA-Z0-9&,]+", "ig");
//patterns used to check URL patterns
var imgur_album_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/a/([a-zA-Z0-9]+)","i");
var imgur_gallery_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/gallery/([a-zA-Z0-9]+)","i");
var imgur_hashes_pattern = RegExp("imgur\.com/(([a-zA-Z0-9]{5,7}[&,]?)+)","i");
var imgur_image_pattern = RegExp("^http://(www\.)?(i\.)?imgur\.com/.{3,7}\.((jpg)|(gif))","ig");
var imgur_album_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/a/([a-zA-Z0-9]+)", "i");
var imgur_gallery_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/gallery/([a-zA-Z0-9]+)", "i");
var imgur_hashes_pattern = RegExp("imgur\.com/(([a-zA-Z0-9]{5,7}[&,]?)+)", "i");
var imgur_image_pattern = RegExp("^http://(www\.)?(i\.)?imgur\.com/.{3,7}\.((jpg)|(gif)|(png))", "ig");
impurge.get_text_imgur_links = function(text) {
var imgur_url_pattern = RegExp("http(s)?://((m)\.|(www)\.|((i)\.))?imgur.com/(a/)?[a-zA-Z0-9&]+((\.jpg)|(\.gif)|(\.png))?", "igm");
var matches = imgur_url_pattern.exec(text);
var urls = []
while ((matches = imgur_url_pattern.exec(text)) !== null) {
urls.push(matches[0]);
};
console.log(urls);
}
impurge.is_imgur = function(url) {
var imgur_url_pattern = RegExp("http(s)?://((m\.)|((www)\.)|((i)\.))?imgur.com/(a/)?[a-zA-Z0-9&]+((\.jpg)|(\.gif)|(\.png))?", "i");
var match = imgur_url_pattern.exec(url);
if (match) {
return true; //console.log(test[0], url);
} else {
//console.log('NOT FOUND:', url);
};
}
//determines the link provided to module
var determine_link_type = function (url, callback) {
if ( imgur_image_pattern.exec(url) ) {
callback(null,'image_url',null,url);
}
else if ( imgur_album_url_pattern.exec(url) ) {
var match = imgur_album_url_pattern.exec(url);
if (match){
var hashes = match[1].split(/[,&]/)
}
callback(null,'album_url',hashes);
}
else if ( imgur_gallery_url_pattern.exec(url) ) {
var match = imgur_gallery_url_pattern.exec(url);
if (match){
var hashes = match[1].split(/[,&]/);
}
callback(null,'gallery_url',hashes);
}
else if ( imgur_hashes_pattern.exec(url) ) {
var match = imgur_hashes_pattern.exec(url);
if (match){
var hashes = match[1].split(/[,&]/);
}
callback(null,'hash_url',hashes);
}
else {
callback('unidentified_type');
}
impurge.determine_link_type = function(url, callback) {
if (imgur_image_pattern.exec(url)) {
callback(null, 'image_url', null, url);
} else if (imgur_album_url_pattern.exec(url)) {
var match = imgur_album_url_pattern.exec(url);
if (match) {
var hashes = match[1].split(/[,&]/)
}
callback(null, 'album_url', hashes);
} else if (imgur_gallery_url_pattern.exec(url)) {
var match = imgur_gallery_url_pattern.exec(url);
if (match) {
var hashes = match[1].split(/[,&]/);
}
callback(null, 'gallery_url', hashes);
} else if (imgur_hashes_pattern.exec(url)) {
var match = imgur_hashes_pattern.exec(url);
if (match) {
var hashes = match[1].split(/[,&]/);
}
callback(null, 'hash_url', hashes);
} else {
callback('unidentified_type');
}
}
impurge.purge = function (url, callback) {
determine_link_type(url, function (error, type, id,url) {
if (error) {
callback(error)
}
else{
var links = [];
if (type === 'image_url'){
callback(null, [url])
return;
}
else if (type === 'album_url'){
var url = 'http://api.imgur.com/2/album/'+id+".json"
}
else if (type === 'hash_url'){
var url = 'http://api.imgur.com/2/image/'+id+".json"
}
else if (type === 'gallery_url'){
var url = 'http://api.imgur.com/2/album/'+id+".json"
}
else {
callback("unknown_link_error")
}
request(url, function (err, res, body) {
try{
var api_json = JSON.parse(body);
} catch (err) {
callback("impurge: JSON parsing error w/following URL: "+ url);
}
for (var type in api_json){
//console.log(type)
if (type === 'image'){
links.push(api_json[type]['links']['original']);
callback(null, links ) ;
}
if (type === 'album'){
var images_json = api_json[type]['images'];
for (var image in images_json){
//console.log(images_json[image]['links']['original']);
links.push(images_json[image]['links']['original']);
}
callback(null, links);
}
}
return;
})
}
//console.log("type: "+ type)
})
}
impurge.purge = function(url, callback) {
impurge.determine_link_type(url, function(error, type, id, url) {
if (error) {
callback(error)
} else {
var links = [];
if (type === 'image_url') {
callback(null, [url])
return;
} else if (type === 'album_url') {
var url = 'http://api.imgur.com/2/album/' + id + ".json"
} else if (type === 'hash_url') {
var url = 'http://api.imgur.com/2/image/' + id + ".json"
} else if (type === 'gallery_url') {
var url = 'http://api.imgur.com/2/album/' + id + ".json"
} else {
callback("unknown_link_error")
}
request(url, function(err, res, body) {
try {
var api_json = JSON.parse(body);
} catch (err) {
callback("impurge: JSON parsing error w/following URL: " + url);
}
for (var type in api_json) {
//console.log(type)
if (type === 'image') {
links.push(api_json[type]['links']['original']);
callback(null, links);
}
if (type === 'album') {
var images_json = api_json[type]['images'];
for (var image in images_json) {
//console.log(images_json[image]['links']['original']);
links.push(images_json[image]['links']['original']);
}
callback(null, links);
}
}
return;
})
}
//console.log("type: "+ type)
})
}
{
"name": "impurge",
"main": "impurge.js",
"subdomain": "impurge",
"scripts": {
"start": "impurge.js",
"test": "mocha -t 4000 test/test.js"
},
"version": "0.0.5",
"description": "takes any imgur url and returns an array of direct image files",
"engines": {
"node": "0.8.x"
},
"dependencies": {
"mocha": "*",
"request": "~2.11.4",
"cheerio": "*"
},
"name": "impurge",
"main": "impurge.js",
"subdomain": "impurge",
"scripts": {
"start": "impurge.js",
"test": "node test/test.js"
},
"version": "0.0.6",
"description": "takes any imgur url and returns an array of direct image files",
"engines": {
"node": "0.8.x"
},
"dependencies": {
"mocha": "*",
"request": "~2.11.4",
"cheerio": "*"
},
"devDependencies": {
"should": "~1.2.1"
},
"keywords": [
"imgur",
"parser",
"image"
],
"author": "hortinstein",
"license": "BSD",
"readmeFilename": "README.md"
}
"should": "~1.2.1"
},
"keywords": [
"imgur",
"parser",
"image"
],
"author": "hortinstein",
"license": "BSD",
"readmeFilename": "README.md"
}

@@ -1,15 +0,41 @@

impurge
#impurge
This is a simple module meant to extract image URLS from imgur
<pre><code>var impurge = require('impurge');
Example:
```js
var impurge = require('impurge');
impurge.purge("http://imgur.com/IvpcP", function (e,r) {
console.log(r)
});
</code></pre>
```
will give you
will result in:
<code>http://i.imgur.com/IvpcP.jpg
```js
http://i.imgur.com/IvpcP.jpg
```
This works for the three type of Imgur links i have encounted, and will spit out all links for albums as well
The new test file included will check the currency of the regular expressions against live reddit data to ensure imgur is not changing link formats and will look for any it does not recognize
I have added a few additional methods that I used for testing the accuracy of my regex's
```js
impurge.is_imgur(url); //will output true if the url is imgur
```
the following takes a string input and will return an array of imgur links contained within
```js
impurge.get_text_imgur_links(text); // will return an array of links
```
and finally i exposed a function to determine the link type (previously used internally)
```js
impurge.determine_link_type(url, function(err, type, id, i_url){
//err is given if link is not recognized
//type is image_url, album_url, gallery_url, hash_url
//id is the id for the link (if applicable)
//i_url is the image url if this is a direct image
});
```

@@ -1,54 +0,58 @@

var should = require('should');
var request = require('request');
var stream = require('stream');
var impurge = require('../impurge.js');
describe('impurge', function(){
describe('image url', function () {
it('should find 1 picture', function(done){
impurge.purge("http://i.imgur.com/AXvN0Mq.png", function (e,r) {
r.length.should.equal(1)
r[0].should.equal('http://i.imgur.com/AXvN0Mq.png');
done();
})
});
});
describe('image hash url', function () {
it('should find 1 picture', function(done){
impurge.purge("http://imgur.com/WWm8Cl6", function (e,r) {
r.length.should.equal(1);
r[0].should.equal('http://i.imgur.com/WWm8Cl6.jpg');
done();
})
});
});
describe('album url', function () {
it('should find 2 pictures', function(done){
impurge.purge("http://imgur.com/a/9uIQf", function (e,r) {
r.length.should.equal(2);
r[0].should.equal('http://i.imgur.com/Z6ft3xZ.jpg');
r[1].should.equal('http://i.imgur.com/xSW842C.jpg');
done();
})
});
});
describe('image hash url', function () {
it('should find 1 picture', function(done){
impurge.purge("http://imgur.com/SKiDPaz", function (e,r) {
r.length.should.equal(1);
r[0].should.equal('http://i.imgur.com/SKiDPaz.jpg');
done();
})
});
});
describe('gallery hash url', function () {
it('should find 4 pictures', function(done){
impurge.purge("http://imgur.com/gallery/Ptn4M", function (e,r) {
r.length.should.equal(4);
r[0].should.equal('http://i.imgur.com/tYx0Ebf.gif');
done();
})
});
});
});
//This function tests impurge by getting 100 gonewild posts
function getImgurPosts() {
var readStream = new stream.Readable({
objectMode: true
});
var usernames = [];
try {
request({
url: 'http://www.reddit.com/r/gonewild.json?limit=100&after=',
json: true
}, function(err, res, obj) {
obj.data.children.forEach(function(item) {
if (item.kind === 't3') readStream.emit('url', item.data.url);
});
});
} catch (error) {
callback("Error retrieving " + test, null)
}
readStream._read = function(n) {
if (!started) loop();
if (!started) userJSON(username);
started = true;
};
return readStream
};
//need to look into converting this to the transform type in blog post below
//http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
getImgurPosts()
.on('url', function(url) {
if (!impurge.is_imgur(url)) {
console.log('**e**NOT IMGUR', url);
} else {
impurge.determine_link_type(url, function(err, type, id, i_url) {
if (err) {
console.log('**e**' + err, url);
throw new Error('unknown imgur link type: ' + url)
} else {
//console.log(url, 'is a ', type, 'with id:', id, 'and url', i_url);
}
});
}
});
//this is a test for text it should return 5 links
// var testTextAlbum = 'http://imgur.com/a/o7AVs is a album_url with id: [ o7AVs ] and url undefined ';
// var testTextImage = 'http://i.imgur.com/4aCgHc7.jpg is a image_url with id: null and url http://i.imgur.com/4aCgHc7.jpg ';
// var testTextHash = 'http://i.imgur.com/oM1mFEd.jpg is a hash_url with id: [ oM1mFEd] and url undefined ';
// var testTextAlbum2 = 'http://imgur.com/a/h4gt1 is a album_url with id: [ h4gt1 ] and url undefined';
// var testText = testTextAlbum + testTextImage + testTextHash + testTextAlbum2;
// console.log(testText);
// var comment_links = impurge.get_text_imgur_links(testText);
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc