lang-detector
Advanced tools
Comparing version 1.0.1 to 1.0.2
106
index.js
@@ -27,16 +27,7 @@ /** | ||
function getPoints(language, lineOfCode, checkers) { | ||
return _.reduce(_.map(checkers, function(checker) { | ||
if (checker.pattern.test(lineOfCode)) { | ||
return checker.points; | ||
} | ||
return 0; | ||
}), function(memo, num) { | ||
return memo + num; | ||
}, 0); | ||
} | ||
/** | ||
* A checker is an object with the following form: | ||
* { pattern: /something/, points: 1 } | ||
* or if the pattern only matches code near the top of a given file: | ||
* { pattern: /something/, points: 2, nearTop: true } | ||
* | ||
@@ -50,3 +41,3 @@ * Key: Language name. | ||
* Points scale: | ||
* 2 = Bonus point: Almost unique to a given language. | ||
* 2 = Bonus points: Almost unique to a given language. | ||
* 1 = Regular point: Not unique to a given language. | ||
@@ -62,3 +53,3 @@ * -1 = Penalty point: Does not match a given language. | ||
// console.log('ayy lmao') | ||
{ pattern: /console\.log( )*\(/g, points: 2 }, | ||
{ pattern: /console\.log( )*\(/, points: 2 }, | ||
// === operator | ||
@@ -88,3 +79,3 @@ { pattern: /===/g, points: 2 }, | ||
// #include <whatever.h> | ||
{ pattern: /#include (<|")\w+\.h(>|")/, points: 2 }, | ||
{ pattern: /#include (<|")\w+\.h(>|")/, points: 2, nearTop: true }, | ||
// pointer | ||
@@ -96,2 +87,4 @@ { pattern: /(\w+)( )*\*( )*\w+/, points: 2 }, | ||
{ pattern: /(\w+)( )+\w+\[.+\]/, points: 1 }, | ||
// #define macro | ||
{ pattern: /#define( )+.+/, points: 1 }, | ||
// NULL constant | ||
@@ -117,6 +110,6 @@ { pattern: /NULL/, points: 1 }, | ||
// #include <whatever.h> | ||
{ pattern: /#include( )*(<|")\w+(\.h)?(>|")/, points: 2 }, | ||
{ pattern: /#include( )*(<|")\w+(\.h)?(>|")/, points: 2, nearTop: true }, | ||
// using namespace something | ||
{ pattern: /using( )*namespace( )*.+( )*;/, points: 2 }, | ||
// template | ||
{ pattern: /using( )*namespace( )*.+( )*;/, points: 2, nearTop: true }, | ||
// template declaration | ||
{ pattern: /template( )*<.*>/, points: 2 }, | ||
@@ -129,6 +122,12 @@ // std | ||
{ pattern: /(public|protected|private):/, points: 2 }, | ||
// new Keyword | ||
{ pattern: /new \w+(\(.*\))?/, points: 2 }, | ||
// nullptr | ||
{ pattern: /nullptr/, points: 2 }, | ||
// new Keyword | ||
{ pattern: /new \w+(\(.*\))?/, points: 1 }, | ||
// #define macro | ||
{ pattern: /#define( )+.+/, points: 1 }, | ||
// template usage | ||
{ pattern: /\w+<\w+>/, points: 1 }, | ||
// class keyword | ||
{ pattern: /class( )+\w+/, points: 1 }, | ||
// void keyword | ||
@@ -150,3 +149,3 @@ { pattern: /void/g, points: 1 }, | ||
// Function definition | ||
{ pattern: /def( )+\w+( )*:/, points: 2 }, | ||
{ pattern: /def( )+\w+\(.*\)( )*:/, points: 2 }, | ||
// while loop | ||
@@ -165,7 +164,7 @@ { pattern: /while (.+):/, points: 2 }, | ||
// for loop | ||
{ pattern: /for (\w+|\(?\w+,( )*\w+\)?) in (.+):?/, points: 1 }, | ||
{ pattern: /for (\w+|\(?\w+,( )*\w+\)?) in (.+):/, points: 2 }, | ||
// Python variable declaration. | ||
{ pattern: /\w+( )*=( )*[\w]+/, points: 1 }, | ||
{ pattern: /\w+( )*=( )*\w+(?!;)(\n|$)/, points: 1 }, | ||
// import something | ||
{ pattern: /import ([[^\.]\w])+/, points: 1 }, | ||
{ pattern: /import ([[^\.]\w])+/, points: 1, nearTop: true }, | ||
// print statement/function | ||
@@ -179,3 +178,3 @@ { pattern: /print((( )*\(.+\))|( )+.+)/, points: 1 }, | ||
// System.out.println() etc. | ||
{ pattern: /System\.(in|out)\./, points: 2 }, | ||
{ pattern: /System\.(in|out)\.\w+/, points: 2 }, | ||
// Class variable declarations | ||
@@ -198,7 +197,7 @@ { pattern: /(private|protected|public)( )*\w+( )*\w+(( )*=( )*[\w])?/, points: 2 }, | ||
// new Keyword (Java) | ||
{ pattern: /new \w+( )*\(.+\)/, points: 2 }, | ||
{ pattern: /new [A-Z]\w*( )*\(.+\)/, points: 2 }, | ||
// C style variable declaration. | ||
{ pattern: /(^|\s)(char|long|int|float|double)( )+[\w]+( )*=?/, points: 1 }, | ||
// extends/implements keywords | ||
{ pattern: /(extends|implements)/, points: 1 }, | ||
{ pattern: /(extends|implements)/, points: 2, nearTop: true }, | ||
// null keyword | ||
@@ -219,6 +218,7 @@ { pattern: /null/g, points: 1 }, | ||
// C style include | ||
{ pattern: /#include( )*(<|")\w+(\.h)?(>|")/, points: -1 }, | ||
{ pattern: /#include( )*(<|")\w+(\.h)?(>|")/, points: -1, nearTop: true }, | ||
], | ||
'HTML': [ | ||
{ pattern: /<!DOCTYPE (html|HTML PUBLIC .+)>/, points: 2, nearTop: true }, | ||
// Tags | ||
@@ -232,3 +232,5 @@ { pattern: /<[a-z0-9]+(( )*[\w]+=('|").+('|")( )*)?>.*<\/[a-z0-9]+>/g, points: 2 }, | ||
// Properties | ||
{ pattern: /[a-z\-]+:.+;/, points: 2 }, | ||
{ pattern: /[a-z\-]+:(?!:).+;/, points: 2 }, | ||
// <style> tag from HTML (This is a rare case where a lot of penalty points are needed.) | ||
{ pattern: /<(\/)?style>/, points: -50 }, | ||
], | ||
@@ -238,3 +240,3 @@ | ||
// require/include | ||
{ pattern: /(require|include)( )*'\w+(\.rb)?'/, points: 2 }, | ||
{ pattern: /(require|include)( )*'\w+(\.rb)?'/, points: 2, nearTop: true }, | ||
// Function definition | ||
@@ -247,3 +249,3 @@ { pattern: /def( )+\w+( )*(\(.+\))?( )*\n/, points: 2 }, | ||
// puts (Ruby print) | ||
{ pattern: /puts( )*("|').+("|')/, points: 2 }, | ||
{ pattern: /puts( )+("|').+("|')/, points: 2 }, | ||
// Inheriting class | ||
@@ -268,2 +270,6 @@ { pattern: /class [A-Z]\w*( )*<( )*([A-Z]\w*(::)?)+/, points: 2 }, | ||
'Go': [ | ||
// package something | ||
{ pattern: /package( )+[a-z]+\n/, points: 2, nearTop: true }, | ||
// import | ||
{ pattern: /(import( )*\(( )*\n)|(import( )+"[a-z0-9\/\.]+")/, points: 2, nearTop: true }, | ||
// error check | ||
@@ -276,3 +282,3 @@ { pattern: /if.+err( )*!=( )*nil.+{/, points: 2 }, | ||
// variable initialisation | ||
{ pattern: /\w+( )*:=( )*.+[;\n]/, points: 2 }, | ||
{ pattern: /\w+( )*:=( )*.+[^;\n]/, points: 2 }, | ||
// if/else if | ||
@@ -293,2 +299,13 @@ { pattern: /(} else )?if.+{/, points: 2 }, | ||
function getPoints(language, lineOfCode, checkers) { | ||
return _.reduce(_.map(checkers, function(checker) { | ||
if (checker.pattern.test(lineOfCode)) { | ||
return checker.points; | ||
} | ||
return 0; | ||
}), function(memo, num) { | ||
return memo + num; | ||
}, 0); | ||
} | ||
function detectLang(snippet, options) { | ||
@@ -305,5 +322,12 @@ var opts = _.defaults(options || {}, { | ||
if (opts.heuristic && linesOfCode.length > 1000) { | ||
function nearTop(index) { | ||
if (linesOfCode.length <= 10) { | ||
return true; | ||
} | ||
return index < linesOfCode.length / 10; | ||
} | ||
if (opts.heuristic && linesOfCode.length >= 500) { | ||
linesOfCode = linesOfCode.filter(function(lineOfCode, index) { | ||
if (index <= linesOfCode.length / 10) { | ||
if (nearTop(index)) { | ||
return true; | ||
@@ -327,7 +351,15 @@ } | ||
var points = _.reduce(_.map(linesOfCode, function(lineOfCode) { | ||
return getPoints(language, lineOfCode, checkers); | ||
}), function(memo, num) { | ||
var pointsList = linesOfCode.map(function(lineOfCode, index) { | ||
if (!nearTop(index)) { | ||
return getPoints(language, lineOfCode, _.reject(checkers, function(checker) { | ||
return checker.nearTop; | ||
})); | ||
} else { | ||
return getPoints(language, lineOfCode, checkers); | ||
} | ||
}); | ||
var points = _.reduce(pointsList, function(memo, num) { | ||
return memo + num; | ||
}, 0); | ||
}); | ||
@@ -334,0 +366,0 @@ return { language: language, points: points }; |
{ | ||
"name": "lang-detector", | ||
"version": "1.0.1", | ||
"version": "1.0.2", | ||
"description": "A library for detecting the programming language of a code snippet.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
482391
10245