Comparing version 0.0.5 to 0.0.6
190
impurge.js
@@ -1,4 +0,4 @@ | ||
var cheerio = require("cheerio") | ||
, request = require("request") | ||
var impurge = []; | ||
var $ = require("cheerio"), | ||
request = require("request") | ||
var impurge = []; | ||
@@ -8,94 +8,106 @@ module.exports = impurge; | ||
//pattern used for extraction of the links from the html | ||
var imgur_url_pattern = RegExp("^http://((www)|(i)\.)?imgur.com/[./a-zA-Z0-9&,]+","ig"); | ||
var imgur_url_pattern = RegExp("^http://((www)|(i)\.)?imgur.com/[./a-zA-Z0-9&,]+", "ig"); | ||
//patterns used to check URL patterns | ||
var imgur_album_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/a/([a-zA-Z0-9]+)","i"); | ||
var imgur_gallery_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/gallery/([a-zA-Z0-9]+)","i"); | ||
var imgur_hashes_pattern = RegExp("imgur\.com/(([a-zA-Z0-9]{5,7}[&,]?)+)","i"); | ||
var imgur_image_pattern = RegExp("^http://(www\.)?(i\.)?imgur\.com/.{3,7}\.((jpg)|(gif))","ig"); | ||
var imgur_album_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/a/([a-zA-Z0-9]+)", "i"); | ||
var imgur_gallery_url_pattern = RegExp("^http://(?:www\.)?imgur\.com/gallery/([a-zA-Z0-9]+)", "i"); | ||
var imgur_hashes_pattern = RegExp("imgur\.com/(([a-zA-Z0-9]{5,7}[&,]?)+)", "i"); | ||
var imgur_image_pattern = RegExp("^http://(www\.)?(i\.)?imgur\.com/.{3,7}\.((jpg)|(gif)|(png))", "ig"); | ||
impurge.get_text_imgur_links = function(text) { | ||
var imgur_url_pattern = RegExp("http(s)?://((m)\.|(www)\.|((i)\.))?imgur.com/(a/)?[a-zA-Z0-9&]+((\.jpg)|(\.gif)|(\.png))?", "igm"); | ||
var matches = imgur_url_pattern.exec(text); | ||
var urls = [] | ||
while ((matches = imgur_url_pattern.exec(text)) !== null) { | ||
urls.push(matches[0]); | ||
}; | ||
console.log(urls); | ||
} | ||
impurge.is_imgur = function(url) { | ||
var imgur_url_pattern = RegExp("http(s)?://((m\.)|((www)\.)|((i)\.))?imgur.com/(a/)?[a-zA-Z0-9&]+((\.jpg)|(\.gif)|(\.png))?", "i"); | ||
var match = imgur_url_pattern.exec(url); | ||
if (match) { | ||
return true; //console.log(test[0], url); | ||
} else { | ||
//console.log('NOT FOUND:', url); | ||
}; | ||
} | ||
//determines the link provided to module | ||
var determine_link_type = function (url, callback) { | ||
if ( imgur_image_pattern.exec(url) ) { | ||
callback(null,'image_url',null,url); | ||
} | ||
else if ( imgur_album_url_pattern.exec(url) ) { | ||
var match = imgur_album_url_pattern.exec(url); | ||
if (match){ | ||
var hashes = match[1].split(/[,&]/) | ||
} | ||
callback(null,'album_url',hashes); | ||
} | ||
else if ( imgur_gallery_url_pattern.exec(url) ) { | ||
var match = imgur_gallery_url_pattern.exec(url); | ||
if (match){ | ||
var hashes = match[1].split(/[,&]/); | ||
} | ||
callback(null,'gallery_url',hashes); | ||
} | ||
else if ( imgur_hashes_pattern.exec(url) ) { | ||
var match = imgur_hashes_pattern.exec(url); | ||
if (match){ | ||
var hashes = match[1].split(/[,&]/); | ||
} | ||
callback(null,'hash_url',hashes); | ||
} | ||
else { | ||
callback('unidentified_type'); | ||
} | ||
impurge.determine_link_type = function(url, callback) { | ||
if (imgur_image_pattern.exec(url)) { | ||
callback(null, 'image_url', null, url); | ||
} else if (imgur_album_url_pattern.exec(url)) { | ||
var match = imgur_album_url_pattern.exec(url); | ||
if (match) { | ||
var hashes = match[1].split(/[,&]/) | ||
} | ||
callback(null, 'album_url', hashes); | ||
} else if (imgur_gallery_url_pattern.exec(url)) { | ||
var match = imgur_gallery_url_pattern.exec(url); | ||
if (match) { | ||
var hashes = match[1].split(/[,&]/); | ||
} | ||
callback(null, 'gallery_url', hashes); | ||
} else if (imgur_hashes_pattern.exec(url)) { | ||
var match = imgur_hashes_pattern.exec(url); | ||
if (match) { | ||
var hashes = match[1].split(/[,&]/); | ||
} | ||
callback(null, 'hash_url', hashes); | ||
} else { | ||
callback('unidentified_type'); | ||
} | ||
} | ||
impurge.purge = function (url, callback) { | ||
determine_link_type(url, function (error, type, id,url) { | ||
if (error) { | ||
callback(error) | ||
} | ||
else{ | ||
var links = []; | ||
if (type === 'image_url'){ | ||
callback(null, [url]) | ||
return; | ||
} | ||
else if (type === 'album_url'){ | ||
var url = 'http://api.imgur.com/2/album/'+id+".json" | ||
} | ||
else if (type === 'hash_url'){ | ||
var url = 'http://api.imgur.com/2/image/'+id+".json" | ||
} | ||
else if (type === 'gallery_url'){ | ||
var url = 'http://api.imgur.com/2/album/'+id+".json" | ||
} | ||
else { | ||
callback("unknown_link_error") | ||
} | ||
request(url, function (err, res, body) { | ||
try{ | ||
var api_json = JSON.parse(body); | ||
} catch (err) { | ||
callback("impurge: JSON parsing error w/following URL: "+ url); | ||
} | ||
for (var type in api_json){ | ||
//console.log(type) | ||
if (type === 'image'){ | ||
links.push(api_json[type]['links']['original']); | ||
callback(null, links ) ; | ||
} | ||
if (type === 'album'){ | ||
var images_json = api_json[type]['images']; | ||
for (var image in images_json){ | ||
//console.log(images_json[image]['links']['original']); | ||
links.push(images_json[image]['links']['original']); | ||
} | ||
callback(null, links); | ||
} | ||
} | ||
return; | ||
}) | ||
} | ||
//console.log("type: "+ type) | ||
}) | ||
} | ||
impurge.purge = function(url, callback) { | ||
impurge.determine_link_type(url, function(error, type, id, url) { | ||
if (error) { | ||
callback(error) | ||
} else { | ||
var links = []; | ||
if (type === 'image_url') { | ||
callback(null, [url]) | ||
return; | ||
} else if (type === 'album_url') { | ||
var url = 'http://api.imgur.com/2/album/' + id + ".json" | ||
} else if (type === 'hash_url') { | ||
var url = 'http://api.imgur.com/2/image/' + id + ".json" | ||
} else if (type === 'gallery_url') { | ||
var url = 'http://api.imgur.com/2/album/' + id + ".json" | ||
} else { | ||
callback("unknown_link_error") | ||
} | ||
request(url, function(err, res, body) { | ||
try { | ||
var api_json = JSON.parse(body); | ||
} catch (err) { | ||
callback("impurge: JSON parsing error w/following URL: " + url); | ||
} | ||
for (var type in api_json) { | ||
//console.log(type) | ||
if (type === 'image') { | ||
links.push(api_json[type]['links']['original']); | ||
callback(null, links); | ||
} | ||
if (type === 'album') { | ||
var images_json = api_json[type]['images']; | ||
for (var image in images_json) { | ||
//console.log(images_json[image]['links']['original']); | ||
links.push(images_json[image]['links']['original']); | ||
} | ||
callback(null, links); | ||
} | ||
} | ||
return; | ||
}) | ||
} | ||
//console.log("type: "+ type) | ||
}) | ||
} |
{ | ||
"name": "impurge", | ||
"main": "impurge.js", | ||
"subdomain": "impurge", | ||
"scripts": { | ||
"start": "impurge.js", | ||
"test": "mocha -t 4000 test/test.js" | ||
}, | ||
"version": "0.0.5", | ||
"description": "takes any imgur url and returns an array of direct image files", | ||
"engines": { | ||
"node": "0.8.x" | ||
}, | ||
"dependencies": { | ||
"mocha": "*", | ||
"request": "~2.11.4", | ||
"cheerio": "*" | ||
}, | ||
"name": "impurge", | ||
"main": "impurge.js", | ||
"subdomain": "impurge", | ||
"scripts": { | ||
"start": "impurge.js", | ||
"test": "node test/test.js" | ||
}, | ||
"version": "0.0.6", | ||
"description": "takes any imgur url and returns an array of direct image files", | ||
"engines": { | ||
"node": "0.8.x" | ||
}, | ||
"dependencies": { | ||
"mocha": "*", | ||
"request": "~2.11.4", | ||
"cheerio": "*" | ||
}, | ||
"devDependencies": { | ||
"should": "~1.2.1" | ||
}, | ||
"keywords": [ | ||
"imgur", | ||
"parser", | ||
"image" | ||
], | ||
"author": "hortinstein", | ||
"license": "BSD", | ||
"readmeFilename": "README.md" | ||
} | ||
"should": "~1.2.1" | ||
}, | ||
"keywords": [ | ||
"imgur", | ||
"parser", | ||
"image" | ||
], | ||
"author": "hortinstein", | ||
"license": "BSD", | ||
"readmeFilename": "README.md" | ||
} |
@@ -1,15 +0,41 @@ | ||
impurge | ||
#impurge | ||
This is a simple module meant to extract image URLS from imgur | ||
<pre><code>var impurge = require('impurge'); | ||
Example: | ||
```js | ||
var impurge = require('impurge'); | ||
impurge.purge("http://imgur.com/IvpcP", function (e,r) { | ||
console.log(r) | ||
}); | ||
</code></pre> | ||
``` | ||
will give you | ||
will result in: | ||
<code>http://i.imgur.com/IvpcP.jpg | ||
```js | ||
http://i.imgur.com/IvpcP.jpg | ||
``` | ||
This works for the three type of Imgur links i have encounted, and will spit out all links for albums as well | ||
The new test file included will check the currency of the regular expressions against live reddit data to ensure imgur is not changing link formats and will look for any it does not recognize | ||
I have added a few additional methods that I used for testing the accuracy of my regex's | ||
```js | ||
impurge.is_imgur(url); //will output true if the url is imgur | ||
``` | ||
the following takes a string input and will return an array of imgur links contained within | ||
```js | ||
impurge.get_text_imgur_links(text); // will return an array of links | ||
``` | ||
and finally i exposed a function to determine the link type (previously used internally) | ||
```js | ||
impurge.determine_link_type(url, function(err, type, id, i_url){ | ||
//err is given if link is not recognized | ||
//type is image_url, album_url, gallery_url, hash_url | ||
//id is the id for the link (if applicable) | ||
//i_url is the image url if this is a direct image | ||
}); | ||
``` |
104
test/test.js
@@ -1,54 +0,58 @@ | ||
var should = require('should'); | ||
var request = require('request'); | ||
var stream = require('stream'); | ||
var impurge = require('../impurge.js'); | ||
describe('impurge', function(){ | ||
describe('image url', function () { | ||
it('should find 1 picture', function(done){ | ||
impurge.purge("http://i.imgur.com/AXvN0Mq.png", function (e,r) { | ||
r.length.should.equal(1) | ||
r[0].should.equal('http://i.imgur.com/AXvN0Mq.png'); | ||
done(); | ||
}) | ||
}); | ||
}); | ||
describe('image hash url', function () { | ||
it('should find 1 picture', function(done){ | ||
impurge.purge("http://imgur.com/WWm8Cl6", function (e,r) { | ||
r.length.should.equal(1); | ||
r[0].should.equal('http://i.imgur.com/WWm8Cl6.jpg'); | ||
done(); | ||
}) | ||
}); | ||
}); | ||
describe('album url', function () { | ||
it('should find 2 pictures', function(done){ | ||
impurge.purge("http://imgur.com/a/9uIQf", function (e,r) { | ||
r.length.should.equal(2); | ||
r[0].should.equal('http://i.imgur.com/Z6ft3xZ.jpg'); | ||
r[1].should.equal('http://i.imgur.com/xSW842C.jpg'); | ||
done(); | ||
}) | ||
}); | ||
}); | ||
describe('image hash url', function () { | ||
it('should find 1 picture', function(done){ | ||
impurge.purge("http://imgur.com/SKiDPaz", function (e,r) { | ||
r.length.should.equal(1); | ||
r[0].should.equal('http://i.imgur.com/SKiDPaz.jpg'); | ||
done(); | ||
}) | ||
}); | ||
}); | ||
describe('gallery hash url', function () { | ||
it('should find 4 pictures', function(done){ | ||
impurge.purge("http://imgur.com/gallery/Ptn4M", function (e,r) { | ||
r.length.should.equal(4); | ||
r[0].should.equal('http://i.imgur.com/tYx0Ebf.gif'); | ||
done(); | ||
}) | ||
}); | ||
}); | ||
}); | ||
//This function tests impurge by getting 100 gonewild posts | ||
function getImgurPosts() { | ||
var readStream = new stream.Readable({ | ||
objectMode: true | ||
}); | ||
var usernames = []; | ||
try { | ||
request({ | ||
url: 'http://www.reddit.com/r/gonewild.json?limit=100&after=', | ||
json: true | ||
}, function(err, res, obj) { | ||
obj.data.children.forEach(function(item) { | ||
if (item.kind === 't3') readStream.emit('url', item.data.url); | ||
}); | ||
}); | ||
} catch (error) { | ||
callback("Error retrieving " + test, null) | ||
} | ||
readStream._read = function(n) { | ||
if (!started) loop(); | ||
if (!started) userJSON(username); | ||
started = true; | ||
}; | ||
return readStream | ||
}; | ||
//need to look into converting this to the transform type in blog post below | ||
//http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/ | ||
getImgurPosts() | ||
.on('url', function(url) { | ||
if (!impurge.is_imgur(url)) { | ||
console.log('**e**NOT IMGUR', url); | ||
} else { | ||
impurge.determine_link_type(url, function(err, type, id, i_url) { | ||
if (err) { | ||
console.log('**e**' + err, url); | ||
throw new Error('unknown imgur link type: ' + url) | ||
} else { | ||
//console.log(url, 'is a ', type, 'with id:', id, 'and url', i_url); | ||
} | ||
}); | ||
} | ||
}); | ||
//this is a test for text it should return 5 links | ||
// var testTextAlbum = 'http://imgur.com/a/o7AVs is a album_url with id: [ o7AVs ] and url undefined '; | ||
// var testTextImage = 'http://i.imgur.com/4aCgHc7.jpg is a image_url with id: null and url http://i.imgur.com/4aCgHc7.jpg '; | ||
// var testTextHash = 'http://i.imgur.com/oM1mFEd.jpg is a hash_url with id: [ oM1mFEd] and url undefined '; | ||
// var testTextAlbum2 = 'http://imgur.com/a/h4gt1 is a album_url with id: [ h4gt1 ] and url undefined'; | ||
// var testText = testTextAlbum + testTextImage + testTextHash + testTextAlbum2; | ||
// console.log(testText); | ||
// var comment_links = impurge.get_text_imgur_links(testText); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Wildcard dependency
QualityPackage has a dependency with a floating version range. This can cause issues if the dependency publishes a new major version.
Found 2 instances in 1 package
8271
151
42
1