Comparing version 0.0.0 to 0.0.1
102
github.js
'use strict'; | ||
var url = require('url'); | ||
var debug = require('debug')('licenses::github') | ||
, url = require('url'); | ||
@@ -43,3 +44,49 @@ /** | ||
parse: function parse(data, next) { | ||
data = this.get(data); | ||
// | ||
// We cannot detect a license so we call the callback without any arguments | ||
// which symbolises a failed attempt. | ||
// | ||
if (!data) return next(); | ||
var parser = this; | ||
this.exists(data, function exists(err, url) { | ||
if (err || !url) return next(err); | ||
var github = parser.repo(url) | ||
, license; | ||
parser.root(github, function root(err, files) { | ||
if (err || !files || !files.length) return next(err); | ||
// | ||
// Fetch and parse the 'raw' content of the file so we can parse it. | ||
// | ||
parser.async.doWhilst(function does(next) { | ||
var file = files.shift(); | ||
debug('searching %s for license information', file.name); | ||
parser.raw(github, file.name, function raw(err, data) { | ||
if (err) return next(err); | ||
parser.parsers.content.parse({ | ||
content: data, | ||
file: file.name | ||
}, function parse(err, data) { | ||
license = data; | ||
if (license) debug('extracted %s from %s', data, file.name); | ||
next(err); | ||
}); | ||
}); | ||
}, function select() { | ||
return !license && files.length; | ||
}, function done(err) { | ||
next(err, parser.normalize(license)); | ||
}); | ||
}); | ||
}); | ||
}, | ||
@@ -55,2 +102,4 @@ | ||
repo: function repo(github) { | ||
if (!github) return {}; | ||
var parsed = url.parse(github) | ||
@@ -60,4 +109,4 @@ , parts = parsed.pathname.split('/'); | ||
return { | ||
user: parts[0], | ||
repo: parts[1] | ||
user: parts[1], | ||
repo: parts[2] | ||
}; | ||
@@ -76,7 +125,7 @@ }, | ||
this.request({ | ||
uri: 'https://raw.github.com/'+ github.user +'/'+ github.repo +'/'+ file, | ||
uri: 'https://raw.github.com/'+ github.user +'/'+ github.repo +'/master/'+ file, | ||
method: 'GET' | ||
}, function fetched(err, res, body) { | ||
if (err) return next(err); | ||
if (res.statusCode !== 200) return next(new Error('Invalid status code')); | ||
if (err || res.statusCode === 404) return next(err); | ||
if (res.statusCode !== 200) return next(new Error('Invalid status code (raw:'+ res.statusCode +')')); | ||
@@ -96,11 +145,17 @@ next(undefined, body); | ||
root: function root(github, next) { | ||
var parser = this; | ||
var url = 'https://api.github.com/repos/'+ github.user +'/'+ github.repo +'/contents' | ||
, parser = this; | ||
debug('retreiving file list from %s', url); | ||
this.request({ | ||
uri: 'https://api.github.com/repos/'+ github.user +'/'+ github.repo +'/contents', | ||
uri: url, | ||
method: 'GET', | ||
headers: { | ||
'User-Agent': 'npm.im/licenses' | ||
}, | ||
json: true | ||
}, function fetched(err, res, files) { | ||
if (err) return next(err); | ||
if (res.statusCode !== 200) return next(new Error('Invalid status code')); | ||
if (err || res.statusCode === 404) return next(err); | ||
if (res.statusCode !== 200) return next(new Error('Invalid status code (root:'+ res.statusCode +')')); | ||
@@ -120,2 +175,29 @@ // | ||
/** | ||
* It's possible that a user has moved the repository to a new location. | ||
* Github automatically redirects you when you access the old page. But it | ||
* doesn't provide any redirection for API calls causing them to fail with | ||
* 404's. | ||
* | ||
* In order to detect the correct repository location we need to do a HEAD | ||
* check of the public github URL and use the location header as source URL | ||
* when we're presented with a 301 status code. | ||
* | ||
* | ||
* @param {String} url The possible location of the repository. | ||
* @param {Function} next Continuation | ||
* @api private | ||
*/ | ||
exists: function exists(url, next) { | ||
var github = this.repo(url); | ||
this.request({ | ||
uri: 'https://github.com/'+ github.user +'/'+ github.repo, | ||
method: 'HEAD' | ||
}, function fetched(err, res, data) { | ||
if (err) return next(err); | ||
next(undefined, res.request.href || url); | ||
}); | ||
}, | ||
/** | ||
* Is github based license detection an option for this package. | ||
@@ -122,0 +204,0 @@ * |
118
index.js
'use strict'; | ||
exports.parsers = { | ||
markdown: new(require('./markdown')), | ||
github: new(require('./github')), | ||
npm: new(require('./npm')) | ||
}; | ||
var debug = require('debug')('licenses::parse') | ||
, request = require('request') | ||
, async = require('async') | ||
, url = require('url'); | ||
/** | ||
* @param {String} name The module name or the package.json contents. | ||
* @param {Object} options Configuration of the parse process. | ||
* @param {Function} fn Callback. | ||
* @api public | ||
*/ | ||
function parse(name, options, fn) { | ||
if ('function' === typeof options) { | ||
fn = options; | ||
options = null; | ||
} | ||
options = options || {}; | ||
options.registry = options.registry || 'http://registry.nodejitsu.com'; | ||
options.order = options.order || ['npm', 'content', 'github']; | ||
async.waterfall([ | ||
// | ||
// Make sure that we have the correct contents to start searching for | ||
// license information. | ||
// | ||
function fetch(next) { | ||
if ('string' !== typeof name) return next(undefined, name); | ||
debug('was given a string, retreiving package from npm : %s', options.registry); | ||
request({ | ||
uri: url.resolve(options.registry, name), | ||
method: 'GET', | ||
json: true | ||
}, function fetched(err, res, data) { | ||
if (err) return next(err); | ||
if (res.statusCode !== 200) return next(new Error('Invalid statusCode: '+ res.statusCode)); | ||
// | ||
// With npm you can never be sure of the data structure. We want to get | ||
// the latest package from the data structure so we need double, triple | ||
// checks. | ||
// | ||
if ( | ||
'object' === typeof data | ||
&& 'dist-tags' in data | ||
&& 'object' === typeof data.versions | ||
&& 'latest' in data['dist-tags'] | ||
&& data['dist-tags'].latest in data.versions | ||
) { | ||
var readme = data.readme; | ||
data = data.versions[data['dist-tags'].latest]; | ||
data.readme = data.readme || readme; | ||
debug('found "dist-tags" updating data to latest version'); | ||
} | ||
next(err, data); | ||
}); | ||
}, | ||
// | ||
// Search for the correct way of parsing out the license information. | ||
// | ||
function search(data, next) { | ||
if (!options.order.length) return next(); | ||
var parser, result; | ||
async.doWhilst(function does(next) { | ||
var name = options.order.shift() | ||
, parser = parse.parsers[name]; | ||
if (!parser.supported(data)) return next(); | ||
debug('attempting to extract the license information using: %s', name); | ||
parser.parse(data, function parsed(err, license) { | ||
if (err) return next(err); | ||
result = license; | ||
if (result) debug('parsing with %s was successful', name); | ||
next(); | ||
}); | ||
}, function select() { | ||
return !result && options.order.length; | ||
}, function done(err) { | ||
next(err, result); | ||
}); | ||
} | ||
], fn); | ||
} | ||
// | ||
// Expose our primary parsers that we can leverage to retrieve license content. | ||
// | ||
parse.parsers = {}; | ||
parse.parsers.content = new(require('./content'))(parse.parsers); | ||
parse.parsers.github = new(require('./github'))(parse.parsers); | ||
parse.parsers.npm = new(require('./npm'))(parse.parsers); | ||
// | ||
// Expose the Parser class so we easily add new parsers through third-party if | ||
// needed. (Think bitbucket and other code hosting sites) | ||
// | ||
parse.Parser = require('./Parser'); | ||
// | ||
// Expose the actual module. | ||
// | ||
module.exports = parse; |
72
npm.js
'use strict'; | ||
var debug = require('debug')('licenses::npm'); | ||
/** | ||
* Parser for npm based license information. | ||
* | ||
* @constructor | ||
* @api public | ||
*/ | ||
@@ -15,7 +20,27 @@ module.exports = require('./parser').extend({ | ||
parse: function parse(data, next) { | ||
data = this.get(data); | ||
// | ||
// We cannot detect a license so we call the callback without any arguments | ||
// which symbolises a failed attempt. | ||
// | ||
if (!data) return next(); | ||
debug('found %s in the package contents', data); | ||
// @TODO handle the edge case where people give us an URL instead of an | ||
// actual license. | ||
next(undefined, this.normalize(data)); | ||
}, | ||
get: function get() { | ||
/** | ||
* @param {Object} data The object that should contain the license. | ||
* @returns {String} | ||
* @api private | ||
*/ | ||
license: function licenses(data) { | ||
if ('string' === typeof data && data) return data; | ||
if ('type' in data && data.type) return data.type; | ||
return; | ||
}, | ||
@@ -31,6 +56,45 @@ | ||
supported: function supported(data) { | ||
return 'string' === typeof data.license | ||
|| 'string' === typeof data.licenses | ||
|| 'object' === typeof data.licenses && Object.keys(data.licenses).length; | ||
return !!this.get(data); | ||
}, | ||
/** | ||
* Retrieve the possible locations of the license information. | ||
* | ||
* @param {Object} data The package.json or npm package contents. | ||
* @returns {Array} | ||
* @api private | ||
*/ | ||
get: function get(data) { | ||
var parser = this | ||
, matches = []; | ||
if ('string' === typeof data.license) { | ||
return [data.license]; | ||
} | ||
if ('string' === typeof data.licenses) { | ||
return [data.licenses]; | ||
} | ||
if (Array.isArray(data.licenses)) { | ||
Array.prototype.push.apply( | ||
matches, | ||
data.licenses.map(function filter(item) { | ||
return parser.license(item); | ||
}).filter(Boolean) | ||
); | ||
} | ||
if ('object' === typeof data.licenses && Object.keys(data.licenses).length) { | ||
Array.prototype.push.apply( | ||
matches, | ||
Object.keys(data.licenses).map(function map(key) { | ||
if (!parser.license(data.licenses[key])) return undefined; | ||
return data.licenses[key]; | ||
}).filter(Boolean) | ||
); | ||
} | ||
if (matches.length) return matches; | ||
} | ||
}); |
{ | ||
"name": "licenses", | ||
"version": "0.0.0", | ||
"version": "0.0.1", | ||
"description": "A small tool that detects licensing information for a given Node.js module", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
"test": "mocha $(find test -name '*.test.js')" | ||
}, | ||
@@ -16,4 +16,12 @@ "keywords": [ | ||
"dependencies": { | ||
"fusing": "0.0.0" | ||
"fusing": "0.0.x", | ||
"async": "0.2.x", | ||
"request": "2.33.x", | ||
"debug": "0.7.x" | ||
}, | ||
"devDependencies": { | ||
"mocha": "1.17.x", | ||
"chai": "1.9.x", | ||
"pre-commit": "0.0.x" | ||
} | ||
} |
243
parser.js
'use strict'; | ||
var request = require('request') | ||
, fuse = require('fusing'); | ||
var debug = require('debug')('licenses::parser') | ||
, normalized = require('./normalize') | ||
, fuse = require('fusing') | ||
, fs = require('fs'); | ||
function Parser() { } | ||
/** | ||
* The base parser class where all parsers inherit from. This provides some | ||
* common functionality which the parsers can use to detect licensing. | ||
* | ||
* @constructor | ||
* @param {Object} parsers An object which contains all available parsers. | ||
* @api public | ||
*/ | ||
function Parser(parsers) { | ||
if (!(this instanceof Parser)) return new Parser(parsers); | ||
this.parsers = parsers; | ||
} | ||
fuse(Parser); | ||
Parser.readable('request', request); | ||
/** | ||
* Expose some core modules through the instance. | ||
* | ||
* @type {Function} | ||
* @api pubilc | ||
*/ | ||
Parser.readable('async', require('async')); | ||
Parser.readable('request', require('request')); | ||
/** | ||
* Simple regular expression based tests for figuring out which license we're | ||
* dealing with. | ||
* | ||
* @param {String} str | ||
* @returns {String} | ||
* @api private | ||
* @returns {Array} | ||
* @api public | ||
*/ | ||
Parser.readable('test', function test(str) { | ||
if (/\sBSD\s/.test(str)) return 'BSD'; | ||
if (/\sGPL\s/.test(str) || /\sGPLv2\s/.test(str)) return 'GPL'; | ||
if (/\sLGPL\s/.test(str)) return 'LGPL'; | ||
if (/\sMIT\s/.test(str) || /\s\(MIT\)\s/.test(str)) return 'MIT'; | ||
if (/\sApache\sLicen[cs]e\s/.test(str)) return 'Apache'; | ||
if (/\sMPL\s/.test(str)) return 'MPL'; | ||
if (/BSD/.test(str)) return ['BSD']; | ||
if (/GPL/.test(str) || /GPLv2/.test(str)) return ['GPL']; | ||
if (/LGPL/.test(str)) return ['LGPL']; | ||
if (/MIT/.test(str) || /\(MIT\)/.test(str)) return ['MIT']; | ||
if (/Apache\s?Licen[cs]e/.test(str)) return ['Apache']; | ||
if (/MPL/.test(str)) return ['MPL']; | ||
@@ -29,8 +52,47 @@ // | ||
// | ||
if (/\sDO\sWHAT\sTHE\sFUCK\sYOU\sWANT\sTO\sPUBLIC\sLICEN[CS]E\s/i.test(str) | ||
|| /\sWTFPL\s/.test(str) | ||
) return 'WTFPL'; | ||
if (/DO\sWHAT\sTHE\sFUCK\sYOU\sWANT\sTO\sPUBLIC\sLICEN[CS]E/i.test(str) | ||
|| /WTFPL/.test(str) | ||
) return ['WTFPL']; | ||
}); | ||
/** | ||
* There are 1000 ways of writing that you're using an MIT module. This | ||
* normalization module attempts to normalize the licenses in to one common | ||
* name. | ||
* | ||
* @param {Array} data A list of license information that needs to be normalized. | ||
* @api public | ||
*/ | ||
Parser.readable('normalize', function normalize(data) { | ||
if (!data) return data; | ||
// | ||
// First we need to pass the data through our dual license checker so can | ||
// figure out if the module is dual licensed as both license values needs to | ||
// be normalized. | ||
return this.dual(data).map(function map(license) { | ||
// | ||
// 1. Direct match. Check for direct matches against our normalized license | ||
// file. | ||
// | ||
if (license in normalized) { | ||
debug('normalized %s to %s using the "direct match" method', license, normalized[license]); | ||
return normalized[license]; | ||
} | ||
// | ||
// 2. toUpperCase. Transform the given license string and the key of | ||
// normalization to lowercase to see if it matches. | ||
// | ||
var transformed = license.toUpperCase(); | ||
if (transformed in normalized) { | ||
debug('normalized %s to %s using the "transform" method', license, normalized[transformed]); | ||
return normalized[transformed]; | ||
} | ||
return license; | ||
}); | ||
}); | ||
/** | ||
* Find an URL in the data structure. | ||
@@ -54,2 +116,153 @@ * | ||
/** | ||
* Check for potential dual licensing in the given license arrays. Most people | ||
* specify them in their package.json as : MIT/GPL because the `npm init` | ||
* doesn't really allow dual licensing. | ||
* | ||
* It supports the following possibilities: | ||
* | ||
* - MIT/GPL | ||
* - MIT and GPL | ||
* - MIT or GPL | ||
* - MIT, GPL | ||
* | ||
* @param {Array} licenses | ||
* @returns {Array} licenses | ||
* @api public | ||
*/ | ||
Parser.readable('dual', function dual(licenses) { | ||
var licensing = []; | ||
if (!licenses) return []; | ||
return licenses.reduce(function reduce(licenses, license) { | ||
license = (license || '').trim(); | ||
if (!license) return; | ||
Array.prototype.push.apply( | ||
licenses, | ||
license.split(/\s{0,}(?:\/|and|or|,)\s{0,}/g) | ||
); | ||
return licenses; | ||
}, []).filter(function duplicate(item, index, all) { | ||
if (!item) return false; | ||
return all.indexOf(item) === index; | ||
}); | ||
}); | ||
/** | ||
*/ | ||
Parser.readable('tokenizer', function tokenizer(str, amount) { | ||
var tokens = str.toLowerCase().split(/\W+/g).filter(Boolean); | ||
if (!amount) return tokens.join(''); | ||
return tokens.reduce(function reduce(words, word, index) { | ||
if (!reduce.index) reduce.index = 0; | ||
if (!reduce.position) { | ||
reduce.position = 0; | ||
words.push([]); | ||
} | ||
words[reduce.index][++reduce.position] = word; | ||
// | ||
// We've reached our maximum amount of words that we allow for matching so | ||
// we need to concat our collection of words in to a single string to | ||
// improve matching. | ||
// | ||
if (reduce.position === amount || index === (tokens.length - 1)) { | ||
words[reduce.index] = words[reduce.index].join(''); | ||
reduce.position = 0; | ||
reduce.index++; | ||
} | ||
return words; | ||
}, []); | ||
}); | ||
/** | ||
* Scan the given string for occurrences of the license text. If the given | ||
* percentage of matching lines is reached, we'll assume a match. | ||
* | ||
* @param {String} str The string that needs to have licence matching. | ||
* @param {Number} percentage Percentage for accepted match. | ||
* @returns {Array} License name if we have a match. | ||
* @api public | ||
*/ | ||
Parser.readable('scan', function scan(str, percentage) { | ||
percentage = percentage || 80; | ||
str = this.tokenizer(str); | ||
var matches = [] | ||
, match; | ||
this.licenses.forEach(function each(license) { | ||
var test = { | ||
total: license.file.length, | ||
license: license.as, | ||
percentage: 0, | ||
matches: 0 | ||
}; | ||
license.file.forEach(function each(line) { | ||
if (str.indexOf(line) !== -1) test.matches++; | ||
}); | ||
test.percentage = test.matches / test.total * 100; | ||
if (test.percentage >= percentage) matches.push(test); | ||
debug('had a %s% match for %s', test.percentage, test.license); | ||
}); | ||
match = matches.sort(function sort(a, b) { | ||
return a.percentage < b.percentage; | ||
})[0]; | ||
if (match) return [match.license]; | ||
}); | ||
/** | ||
* The contents of various of license types that we can use for comparison. | ||
* | ||
* @type {Array} | ||
* @api private | ||
*/ | ||
Parser.readable('licenses', [ | ||
{ file: 'AFL2.1.txt', as: 'AFL 2.1' }, | ||
{ file: 'AFL3.0.txt', as: 'AFL 3.0' }, | ||
{ file: 'AGPL3.0.txt', as: 'AGPL 3.0' }, | ||
{ file: 'APL-1.0.txt', as: 'APL 1.0' }, | ||
{ file: 'Apache2.0.txt', as: 'Apache 2.0' }, | ||
{ file: 'Artistic2.0.txt', as: 'Artistic 2.0' }, | ||
{ file: 'BSD-2-Clause.txt', as: 'BSD 2-Clause' }, | ||
{ file: 'BSD-3-Clause.txt', as: 'BSD 3-Clause' }, | ||
{ file: 'BSD.txt', as: 'BSD 4-Clouse' }, | ||
{ file: 'BSL1.0.txt', as: 'BSL 1.0' }, | ||
{ file: 'EPL-1.0.txt', as: 'EPL 1.0' }, | ||
{ file: 'GPL-2.0.txt', as: 'GPL 2.0' }, | ||
{ file: 'GPL-3.0.txt', as: 'GPL 3.0' }, | ||
{ file: 'ISC.txt', as: 'ISC.txt' }, | ||
{ file: 'LGPL-2.1.txt', as: 'LGPL 2.1' }, | ||
{ file: 'LGPL-3.0.txt', as: 'LGPL 3.0' }, | ||
{ file: 'MIT.txt', as: 'MIT' }, | ||
{ file: 'MPL-1.0.txt', as: 'MPL' }, | ||
{ file: 'MPL-2.0.txt', as: 'MPL 2.0' }, | ||
{ file: 'Python2.txt', as: 'Python 2.0' }, | ||
{ file: 'UNLICENSE.txt', as: 'UNLICENSE' }, | ||
{ file: 'WTFPL.txt', as: 'WTFPL' }, | ||
{ file: 'beerware.txt', as: 'Beerware' }, | ||
{ file: 'cddl1.txt', as: 'CDDL 1.0' }, | ||
{ file: 'nasa.txt', as: 'NASA 1.3' }, | ||
{ file: 'zlib.txt', as: 'zlib/libpng' } | ||
].map(function map(license) { | ||
license.file = this.tokenizer( | ||
fs.readFileSync(__dirname +'/licenses/'+ license.file, 'utf-8'), | ||
5 | ||
); | ||
return license; | ||
}.bind(Parser.prototype))); | ||
// | ||
@@ -56,0 +269,0 @@ // Expose the parser. |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Copyleft License
License(Experimental) Copyleft license information was found.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
Mixed license
License(Experimental) Package contains multiple licenses.
Found 1 instance in 1 package
Non-permissive License
License(Experimental) A license not known to be considered permissive was found.
Found 1 instance in 1 package
Unidentified License
License(Experimental) Something that seems like a license was found, but its contents could not be matched with a known license.
Found 1 instance in 1 package
No README
QualityPackage does not have a README. This may indicate a failed publish or a low quality package.
Found 1 instance in 1 package
No tests
QualityPackage does not have any tests. This is a strong signal of a poorly maintained or low quality package.
Found 1 instance in 1 package
344506
39
1140
2
0
73
4
3
4
70
2
1
+ Addedasync@0.2.x
+ Addeddebug@0.7.x
+ Addedrequest@2.33.x
+ Addedasn1@0.1.11(transitive)
+ Addedassert-plus@0.1.5(transitive)
+ Addedasync@0.2.100.9.2(transitive)
+ Addedaws-sign2@0.5.0(transitive)
+ Addedboom@0.4.2(transitive)
+ Addedcombined-stream@0.0.7(transitive)
+ Addedcryptiles@0.2.2(transitive)
+ Addedctype@0.5.3(transitive)
+ Addeddebug@0.7.4(transitive)
+ Addeddelayed-stream@0.0.5(transitive)
+ Addedforever-agent@0.5.2(transitive)
+ Addedform-data@0.1.4(transitive)
+ Addedfusing@0.0.3(transitive)
+ Addedhawk@1.0.0(transitive)
+ Addedhoek@0.9.1(transitive)
+ Addedhttp-signature@0.10.1(transitive)
+ Addedjson-stringify-safe@5.0.1(transitive)
+ Addedmime@1.2.11(transitive)
+ Addednode-uuid@1.4.8(transitive)
+ Addedoauth-sign@0.3.0(transitive)
+ Addedqs@0.6.6(transitive)
+ Addedrequest@2.33.0(transitive)
+ Addedsntp@0.2.4(transitive)
+ Addedtldts@6.1.77(transitive)
+ Addedtldts-core@6.1.77(transitive)
+ Addedtough-cookie@5.1.1(transitive)
+ Addedtunnel-agent@0.3.0(transitive)
- Removedfusing@0.0.0(transitive)
Updatedfusing@0.0.x