Comparing version 0.1.0 to 0.2.0
311
fetch.js
var http = require("http"), | ||
https = require("https"), | ||
urllib = require("url"), | ||
zlib = require('zlib'); | ||
utillib = require("util"), | ||
zlib = require('zlib'), | ||
Stream = require("stream").Stream, | ||
CookieJar = require("./cookiejar").CookieJar; | ||
@@ -15,53 +18,106 @@ /* | ||
maxRedirects : 10 | ||
disableRedirects : false | ||
headers: {} | ||
maxResponseLength : Infinity | ||
method: GET | ||
payload: str | ||
cookies: ['name=val'] | ||
*/ | ||
module.exports = fetch; | ||
exports.FetchStream = FetchStream; | ||
function fetch(url, options, callback){ | ||
function FetchStream(url, options){ | ||
Stream.call(this); | ||
if(!callback && typeof options == "function"){ | ||
callback = options; | ||
options = undefined; | ||
this.url = url; | ||
if(!this.url){ | ||
return this.emit("error", new Error("url not defined")); | ||
} | ||
options = options || {}; | ||
if(typeof options.maxredirects != "number" && !(options.maxredirects instanceof Number)){ | ||
options.maxredirects = 10; | ||
this.userAgent = "FetchStream"; | ||
this.cookieJar = new CookieJar(); | ||
this._redirect_count = 0; | ||
this.options = options || {}; | ||
this.normalizeOptions(); | ||
this.runStream(url); | ||
} | ||
utillib.inherits(FetchStream, Stream); | ||
FetchStream.prototype.normalizeOptions = function(){ | ||
// default redirects - 10 | ||
// if disableRedirect is set, then 0 | ||
if(!this.options.disableRedirect && typeof this.options.maxredirects != "number" && | ||
!(this.options.maxredirects instanceof Number)){ | ||
this.options.maxRedirects = 10; | ||
}else if(this.options.disableRedirects){ | ||
this.options.maxRedirects = 0; | ||
} | ||
// todo: redirects | ||
function go(url, i){ | ||
i = i || 0; | ||
// normalize header keys | ||
// HTTP and HTTPS takes in key names in case insensitive but to find | ||
// an exact value from an object key name needs to be case sensitive | ||
// so we're just lowercasing all input keys | ||
this.options.headers = this.options.headers || {}; | ||
get(url, options, function(error, response){ | ||
if(error){ | ||
return callback(error); | ||
} | ||
if([301, 302].indexOf(response.status)>=0 && response.headers.location){ | ||
i++; | ||
if(i>options.maxredirects){ | ||
return callback(null, response); | ||
} | ||
go(response.headers.location, i); | ||
}else{ | ||
return callback(null, response); | ||
} | ||
}); | ||
var keys = Object.keys(this.options.headers), | ||
newheaders = {}, | ||
i; | ||
for(i=keys.length-1; i>=0; i--){ | ||
newheaders[keys[i].toLowerCase().trim()] = this.options.headers[keys[i]]; | ||
} | ||
go(url); | ||
this.options.headers = newheaders; | ||
} | ||
if(!this.options.headers["user-agent"]){ | ||
this.options.headers["user-agent"] = this.userAgent; | ||
} | ||
function get(url, options, callback){ | ||
if(!this.options.headers["pragma"]){ | ||
this.options.headers["pragma"] = "no-cache"; | ||
} | ||
if(!callback && typeof options == "function"){ | ||
callback = options; | ||
options = undefined; | ||
if(!this.options.headers["cache-control"]){ | ||
this.options.headers["cache-control"] = "no-cache"; | ||
} | ||
options = options || {}; | ||
if(!this.options.disableGzip){ | ||
this.options.headers['accept-encoding'] = 'gzip, deflate'; | ||
}else{ | ||
delete this.options.headers['accept-encoding']; | ||
} | ||
options.maxresponse = options.maxresponse || (100*1024); // 100kB | ||
// max length for the response, | ||
// if not set, default is Infinity | ||
if(!this.options.maxResponseLength){ | ||
this.options.maxResponseLength = Infinity; | ||
} | ||
// method: | ||
// defaults to GET, or when payload present to POST | ||
if(!this.options.method){ | ||
this.options.method = this.options.payload?"POST":"GET"; | ||
} | ||
// set cookies | ||
// takes full cookie definition strings as params | ||
if(this.options.cookies){ | ||
for(var i=0; i<this.options.cookies.length; i++){ | ||
this.cookieJar.setCookie(this.options.cookies[i], this.url); | ||
} | ||
} | ||
} | ||
FetchStream.prototype.parseUrl = function(url){ | ||
var urlparts = urllib.parse(url, false, true), | ||
@@ -73,3 +129,3 @@ transport, | ||
path: urlparts.pathname + (urlparts.search || "") || "/", | ||
method: options.payload?'POST':'GET' | ||
method: this.options.method | ||
}; | ||
@@ -83,3 +139,3 @@ | ||
break; | ||
case "https:": | ||
case "http:": | ||
default: | ||
@@ -92,23 +148,107 @@ urloptions.port = 80; | ||
if(options.headers){ | ||
urloptions.headers = options.headers; | ||
}else{ | ||
urloptions.headers = {}; | ||
urloptions.headers = this.options.headers; | ||
return { | ||
urloptions: urloptions, | ||
transport: transport | ||
} | ||
} | ||
if(!options.nocompress){ | ||
urloptions.headers['Accept-Encoding'] = 'gzip'; | ||
FetchStream.prototype.setEncoding = function(encoding){ | ||
this.options.encoding = encoding; | ||
} | ||
FetchStream.prototype.absoluteUrl = function(url, base){ | ||
var target_url = urllib.parse(url, false, true), | ||
base_url = urllib.parse(base || "", false, true), | ||
base_path, target_path, final_path; | ||
// if protocol is set, then it's good to go | ||
if(target_url.protocol){ | ||
return url; | ||
} | ||
var req = transport.request(urloptions, function(res) { | ||
// the url might be int the form of "//www.example.com" with leading slashes - | ||
// the protocol from the base url must be used, defaults to http | ||
if(target_url.hostname){ | ||
return (base_url.protocol || "http:") + (url.substr(0,2)!="//"?"//":"") + url; | ||
} | ||
var responseBody = new Buffer(0), | ||
currentPart, | ||
// this is absolute path for relative domain | ||
if(target_url.pathname.substr(0,1)=="/"){ | ||
return (base_url.protocol || "http:") + "//" + (base_url.hostname || "") + url; | ||
} | ||
// relative path | ||
// remove also .. and . directory references | ||
base_path = (base_url.pathname || "/").split("/"); | ||
base_path.pop(); // ditch the last element, empty for dir or a file name | ||
target_path = (target_url.pathname || "/").split("/"); | ||
target_path = base_path.concat(target_path); | ||
final_path = []; | ||
target_path.forEach(function(dir){ | ||
if(dir=="."){ | ||
return; | ||
} | ||
if(dir==".."){ | ||
final_path.pop(); | ||
return; | ||
} | ||
if(dir){ | ||
final_path.push(dir); | ||
} | ||
}); | ||
return (base_url.protocol || "http:") + "//" + (base_url.hostname || "") + "/" + | ||
final_path.join("/") + (target_url.search || ""); | ||
} | ||
FetchStream.prototype.runStream = function(url){ | ||
var url_data = this.parseUrl(url), | ||
cookies = this.cookieJar.getCookies(url); | ||
if(cookies){ | ||
url_data.urloptions.headers.cookie = cookies; | ||
}else{ | ||
delete url_data.urloptions.headers.cookie; | ||
} | ||
var req = url_data.transport.request(url_data.urloptions, (function(res) { | ||
// catch new cookies before potential redirect | ||
if(Array.isArray(res.headers['set-cookie'])){ | ||
for(var i=0; i<res.headers['set-cookie'].length; i++){ | ||
this.cookieJar.setCookie(res.headers['set-cookie'][i], url) | ||
} | ||
} | ||
if([301, 302].indexOf(res.statusCode)>=0){ | ||
if(!this.options.disableRedirects && this.options.maxRedirects>this._redirect_count && res.headers.location){ | ||
this._redirect_count++; | ||
this.runStream(this.absoluteUrl(res.headers.location, url)); | ||
return; | ||
} | ||
} | ||
this.meta = { | ||
status: res.statusCode, | ||
responseHeaders: res.headers, | ||
finalUrl: url, | ||
redirectCount: this._redirect_count, | ||
cookieJar: this.cookieJar | ||
} | ||
var curlen = 0, | ||
maxlen, | ||
unpack, | ||
receive = function(chunk){ | ||
receive = (function(chunk){ | ||
if(responseBody.length + chunk.length>options.maxresponse){ | ||
maxlen = options.maxresponse - responseBody.length; | ||
if(curlen + chunk.length > this.options.maxResponseLength){ | ||
maxlen = this.options.maxResponseLength - curlen; | ||
}else{ | ||
@@ -119,32 +259,43 @@ maxlen = chunk.length; | ||
currentPart = new Buffer(responseBody.length + maxlen); | ||
responseBody.copy(currentPart); | ||
chunk.copy(currentPart, responseBody.length, 0, maxlen); | ||
responseBody = currentPart; | ||
}, | ||
curlen += Math.min(maxlen, chunk.length); | ||
end = function(){ | ||
callback(null, { | ||
status: res.statusCode, | ||
headers: res.headers, | ||
body: responseBody | ||
}); | ||
} | ||
if(maxlen>=chunk.length){ | ||
if(this.options.encoding){ | ||
this.emit("data", chunk.toString(this.options.encoding)); | ||
}else{ | ||
this.emit("data", chunk); | ||
} | ||
}else{ | ||
if(this.options.encoding){ | ||
this.emit("data", chunk.slice(0, maxlen).toString(this.options.encoding)); | ||
}else{ | ||
this.emit("data", chunk.slice(0, maxlen)); | ||
} | ||
} | ||
}).bind(this), | ||
error = (function(e){ | ||
this.emit("error", e); | ||
}).bind(this), | ||
end = (function(){ | ||
this.emit("end"); | ||
}).bind(this), | ||
unpack = (function(type, res){ | ||
var z = zlib["create"+type](); | ||
z.on("data", receive); | ||
z.on("error", error); | ||
z.on("end", end); | ||
res.pipe(z); | ||
}).bind(this); | ||
this.emit("meta", this.meta); | ||
if(res.headers['content-encoding']){ | ||
switch(res.headers['content-encoding'].toLowerCase().trim()){ | ||
case "gzip": | ||
unpack = zlib.createGunzip(); | ||
unpack.on("data", receive); | ||
unpack.on("error", callback); | ||
unpack.on("end", end); | ||
res.pipe(unpack); | ||
return; | ||
return unpack("Gunzip", res); | ||
case "deflate": | ||
unpack = zlib.createInflateRaw(); | ||
unpack.on("data", receive); | ||
unpack.on("error", callback); | ||
unpack.on("end", end); | ||
res.pipe(unpack); | ||
return; | ||
return unpack("InflateRaw", res); | ||
} | ||
@@ -155,12 +306,14 @@ } | ||
res.on('end', end); | ||
}); | ||
req.on('error', callback); | ||
}).bind(this)); | ||
if(options.payload){ | ||
req.end(options.payload); | ||
req.on('error', (function(e){ | ||
this.emit("error", e); | ||
}).bind(this)); | ||
if(this.options.payload){ | ||
req.end(this.options.payload); | ||
}else{ | ||
req.end(); | ||
} | ||
} | ||
} |
{ | ||
"name": "fetch", | ||
"description": "Fetch URL contents", | ||
"version": "0.1.0", | ||
"version": "0.2.0", | ||
"author" : "Andris Reinman", | ||
@@ -6,0 +6,0 @@ "maintainers":[ |
# fetch | ||
Fetch url contents | ||
Fetch url contents. Supports gzipped content for quicker download, redirects(with automatic cookie handling), streaming etc. | ||
@@ -11,10 +11,2 @@ ## Install | ||
var fetch = require("fetch"); | ||
fetch("http://www.google.com", function(error, response){ | ||
console.log(response.status); | ||
console.log(response.headers); | ||
console.log(response.body); | ||
}); | ||
See test.js for a complete example | ||
@@ -21,0 +13,0 @@ |
27
test.js
@@ -1,7 +0,22 @@ | ||
var fetch = require("./fetch"); | ||
var FetchStream = require("./fetch").FetchStream; | ||
fetch("https://www.google.com", function(error, contents){ | ||
console.log(error || contents); | ||
console.log(contents.body.toString("utf-8")) | ||
console.log(contents.body.toString("utf-8").length) | ||
}); | ||
var fetch = new FetchStream("http://neti.ee",{ | ||
headers:{} | ||
}); | ||
fetch.on("data", function(chunk){ | ||
console.log(chunk); | ||
}); | ||
fetch.on("meta", function(meta){ | ||
console.log(meta); | ||
}); | ||
fetch.on("end", function(){ | ||
console.log("END"); | ||
}); | ||
fetch.on("error", function(e){ | ||
console.log("ERROR: " + (e && e.message || e)); | ||
}); | ||
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
15625
6
410
15
7