Comparing version 0.2.0 to 0.3.0
13
app.js
var app = require('connect')(); | ||
var createRepostGuard = require('./src/repost-guard'); | ||
var fs = require('fs'); | ||
var log = require('./src/util').log; | ||
var path = require('path'); | ||
// Keeping this forever in memory for now. | ||
createRepostGuard.shared = createRepostGuard({ | ||
directory: path.join(__dirname, 'tmp'), | ||
lineLimit: 5000, // ~350 links * 14 days | ||
// Number of most recent links discounted for being on current page. | ||
feedPageSize: 30, | ||
sync: false, | ||
onReady: function() { log('Links loaded.'); } | ||
}); | ||
createRepostGuard.shared.setUp(); | ||
require('./config').feeds.forEach(function(feed) { | ||
@@ -6,0 +19,0 @@ var middleware = require(path.join(__dirname, 'src/feeds', feed.name)); |
{ | ||
"feeds": [ | ||
{ | ||
"name": "gama-sutra", | ||
"filters": [ | ||
{ | ||
"name": "uninteresting-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Get a job", "Sponsored", "Pokémon" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "hacker-news", | ||
@@ -14,4 +27,5 @@ | ||
"#C", "\\.NET", "Angular", "BEM", "DuckDuckGo", "Java", "Kotlin", | ||
"Lua", "MatLab", "OCAML", "Perl", "R", "Raspberry Pi", "Rust", | ||
"Surface Pro", "Xamarin" | ||
"Lua", "MatLab", "OCaml", "Perl", "R", "Raspberry\\s?Pi", "Rust", | ||
"Surface Pro", "Xamarin", "Vim", "Emacs", "Bitcoin", "LaTeX", | ||
"TypeScript", "C\\+\\+", "TPP" | ||
] | ||
@@ -23,8 +37,84 @@ }, | ||
"tokens": [ | ||
"Trump", "Bernie", "Hillary" | ||
"Trump", "Bernie", "Hillary", "San Bernardino", "Theranos" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "nyt-business", | ||
"filters": [ | ||
{ | ||
"name": "uninteresting-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Tech Tip", "The Week Ahead", "Farhad and Mike" | ||
] | ||
}, | ||
{ | ||
"name": "tired-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Trump", "Bernie", "Hillary", "Cosby" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "quartz", | ||
"filters": [ | ||
{ | ||
"name": "uninteresting-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Jenner", "Indian?s?", "Modi", "Mallya", "Jaitley", | ||
"Quartz Daily Brief", "Quartz Weekend Brief" | ||
] | ||
}, | ||
{ | ||
"name": "tired-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Trump", "Bernie", "Hillary", "San Bernardino", "Brexit", | ||
"Sharapova", "hoverboard" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "ray-wenderlich", | ||
"filters": [ | ||
{ | ||
"name": "uninteresting-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Video Tutorial", "Open Call For Applications" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"name": "yahoo-tech", | ||
"filters": [ | ||
{ | ||
"name": "uninteresting-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"iPhone", "Playstation", "Raspberry\\s?Pi", "Xbox", | ||
"Deal of the Day" | ||
] | ||
}, | ||
{ | ||
"name": "tired-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Trump", "Bernie", "Hillary", "San Bernardino", "hoverboard" | ||
] | ||
} | ||
] | ||
} | ||
] | ||
} |
{ | ||
"name": "custom-rss", | ||
"version": "0.2.0", | ||
"version": "0.3.0", | ||
"description": "Filtering RSS because Zapier is too expensive.", | ||
@@ -5,0 +5,0 @@ "main": "src/feeds/index.js", |
@@ -10,5 +10,8 @@ # Custom RSS | ||
### Features | ||
- [x] Blacklist filtering | ||
- [x] Repost guarding | ||
- [x] Skipped entry logging | ||
- [x] No database required | ||
- [x] Uses JSON for configuration | ||
@@ -18,1 +21,22 @@ - [x] Feeds as middleware functions | ||
- [x] ES5, so compatible with older Node versions | ||
### Usage | ||
The sample app is what's in the root directory. It's a barebones Connect app, with personal configuration in `config.json`. This is foremost for personal use, but what's in `/src` should be reusable with any Connect-like web framework. | ||
### Feeds | ||
- [x] Hacker News (via [hnapp](http://hnapp.com)) | ||
- [x] Quartz | ||
- [x] NYTimes Business | ||
- [x] Yahoo Tech | ||
- [x] Ray Wenderlich | ||
- [x] Gama Sutra | ||
--- | ||
### Implementation | ||
Some shared hosting providers, including mine, refuse to have NPM installed on their system. So dependencies need to be few to none, unless they're small enough to version. No XML parser or writer is used; a much lighter hand-rolled transformer does basic regex parsing. No MySQL client is used; data is stored with limits in plain files and manipulated in buffers (memory). No logger or mailer is used for feedback; custom loggers are handrolled as needed, with utilities on top of `fs`. The test-runner is handrolled (only because not a lot is required). Connect is the only dependency (but not really, see usage). This core constraint also yields the opportunity to learn Node fundamentals. | ||
![custom-rss](https://cloud.githubusercontent.com/assets/100884/13690283/08d7c958-e6e4-11e5-9a83-dacfb7dc7d2f.png) |
@@ -12,18 +12,2 @@ var fetchFeed = require('../fetch-feed'); | ||
function shouldSkipEntry(entry, filters, repostGuard) { | ||
var title = entry.find('title'); | ||
var skip = filters.reduce(function(skip, filter) { | ||
return skip || filter.pattern.test(title); | ||
}, false); | ||
if (skip) { skip = 'blocked'; } | ||
var link = entry.find('link', 'href'); | ||
if (skip === false) { | ||
skip = !repostGuard.checkLink(link); | ||
if (skip) { skip = 'repost'; } | ||
} | ||
return skip; | ||
} | ||
function transformMeta(root) { | ||
@@ -54,6 +38,4 @@ root.transformContent('title', { to: 'Hacker News (filtered)' }); | ||
findLink: function(entry) { return entry.find('link', 'href'); }, | ||
findTitle: function(entry) { return entry.find('title'); }, | ||
shouldSkipEntry: shouldSkipEntry, | ||
transformEntry: transformTitle, | ||
transformMeta: transformMeta, | ||
transformEntry: transformTitle, | ||
verbose: true, | ||
@@ -60,0 +42,0 @@ onDone: function(data) { |
module.exports = { | ||
hackerNews: require('./hacker-news') | ||
gamaSutra: require('./gama-sutra'), | ||
hackerNews: require('./hacker-news'), | ||
nytBusiness: require('./nyt-business'), | ||
quartz: require('./quartz'), | ||
rayWenderlich: require('./ray-wenderlich'), | ||
yahooTech: require('./yahoo-tech') | ||
}; |
@@ -1,9 +0,8 @@ | ||
var http = require('http'); | ||
var log = require('./util').log; | ||
var util = require('./util'); | ||
module.exports = function fetchFeed(delegate) { | ||
var request = http.request(delegate.url); | ||
var request = util.request(delegate.url); | ||
request.on('error', function(e) { | ||
log(e.message); | ||
util.log(e.message); | ||
delegate.onError(e); | ||
@@ -13,4 +12,4 @@ }); | ||
request.on('response', function(response) { | ||
log('status', response.statusCode); | ||
log('headers', JSON.stringify(response.headers)); | ||
util.log('status', response.statusCode); | ||
util.log('headers', JSON.stringify(response.headers)); | ||
@@ -22,3 +21,3 @@ var data = ''; | ||
}).on('end', function() { | ||
if (delegate.verbose) { log('data', data); } | ||
if (delegate.verbose) { util.log('data', data); } | ||
delegate.onResponse(response, data); | ||
@@ -25,0 +24,0 @@ }); |
@@ -18,12 +18,50 @@ var createEntryLogger = require('./entry-logger'); | ||
var defaultFinders = { | ||
entry: function(root) { return root.find('entry'); }, | ||
link: function(entry) { return entry.find('link'); }, | ||
title: function(entry) { return entry.find('title'); } | ||
}; | ||
function defaultShouldSkipEntry(entry, finders, filters, repostGuard) { | ||
var title = finders.title(entry); | ||
var skip = filters.reduce(function(skip, filter) { | ||
return skip || filter.pattern.test(title); | ||
}, false); | ||
if (skip) { skip = 'blocked'; } | ||
var link = finders.link(entry); | ||
if (repostGuard && skip === false) { | ||
skip = !repostGuard.checkLink(link); | ||
if (skip) { skip = 'repost'; } | ||
} | ||
return skip; | ||
} | ||
function filterFeed(delegate) { | ||
var filters = createFilters(delegate.config.filters); | ||
var root = createXMLTransformer({ string: delegate.data, verbose: delegate.verbose }); | ||
delegate.transformMeta(root); | ||
var root = createXMLTransformer({ | ||
string: delegate.data, verbose: delegate.verbose | ||
}); | ||
if (delegate.transformMeta) { | ||
delegate.transformMeta(root); | ||
} | ||
var finders = { | ||
entry: delegate.findEntry || defaultFinders.entry, | ||
id: delegate.findId, | ||
link: delegate.findLink || defaultFinders.link, | ||
title: delegate.findTitle || defaultFinders.title | ||
}; | ||
var guard; | ||
if (delegate.guardReposts !== false) { | ||
guard = createRepostGuard.shared; | ||
} | ||
var shouldSkipEntry = delegate.shouldSkipEntry || defaultShouldSkipEntry; | ||
var entry, skip; | ||
while ((entry = root.find('entry'))) { | ||
while ((entry = finders.entry(root))) { | ||
if ((skip = delegate.shouldSkipEntry(entry, filters, delegate.guard)) && | ||
if ((skip = shouldSkipEntry(entry, finders, filters, guard)) && | ||
skip !== false) | ||
@@ -33,8 +71,10 @@ { | ||
delegate.logger.logEntry({ | ||
id: delegate.findId(entry), | ||
title: delegate.findTitle(entry) +' ('+ skip +')' | ||
id: finders.id(entry), | ||
title: finders.title(entry) +' ('+ skip +')' | ||
}); | ||
} else { | ||
delegate.transformEntry(entry); | ||
if (delegate.transformEntry) { | ||
delegate.transformEntry(entry); | ||
} | ||
root.next(); | ||
@@ -48,12 +88,6 @@ } | ||
module.exports = function(delegate) { | ||
// Wait for guard, logger. | ||
var deferredFilterFeed = util.callOn(2, filterFeed.bind(null, delegate)); | ||
delegate.guard = createRepostGuard({ | ||
directory: directory, | ||
lineLimit: 5000, // ~350 links * 14 days | ||
// Number of most recent links discounted for being on current page. | ||
feedPageSize: 30, | ||
sync: false, | ||
onReady: deferredFilterFeed | ||
}); | ||
// Remove any XML stylesheets; we won't be serving them. | ||
delegate.data = delegate.data.replace(/<\?xml-stylesheet[^]+?\?>\s*/g, ''); | ||
// Wait for logger. | ||
delegate.logger = createEntryLogger({ | ||
@@ -64,3 +98,3 @@ directory: directory, | ||
sync: false, | ||
onReady: deferredFilterFeed | ||
onReady: filterFeed.bind(null, delegate) | ||
}); | ||
@@ -72,3 +106,5 @@ | ||
onDone.apply(delegate, arguments); | ||
delegate.guard.tearDown(); | ||
createRepostGuard.shared.persistLinks(function() { | ||
util.log('Links persisted.'); | ||
}); | ||
delegate.logger.tearDown(); | ||
@@ -78,4 +114,3 @@ }; | ||
// Start. | ||
delegate.guard.setUp(); | ||
delegate.logger.setUp(); | ||
}; |
@@ -17,3 +17,23 @@ var path = require('path'); | ||
persistLinks: function(callback) { | ||
if (!this.dataChanged || this.isPersisting) { | ||
if (callback) { callback(); } | ||
return false; | ||
} | ||
this.isPersisting = true; | ||
util.writeFile({ | ||
data: this.data, | ||
file: this.storeFile(), | ||
sync: delegate.sync, | ||
onDone: function() { | ||
this.isPersisting = false; | ||
if (callback) { callback(); } | ||
}.bind(this) | ||
}); | ||
}, | ||
setUp: function() { | ||
if (this.data) { | ||
throw 'existing data will be overwritten by read file'; | ||
} | ||
util.readFile({ | ||
@@ -32,12 +52,3 @@ file: this.storeFile(), | ||
tearDown: function() { | ||
if (!this.dataChanged) { | ||
this.resetData(); | ||
return false; | ||
} | ||
util.writeFile({ | ||
data: this.data, | ||
file: this.storeFile(), | ||
sync: delegate.sync, | ||
onDone: this.resetData.bind(this) | ||
}); | ||
return this.persistLinks(this.resetData.bind(this)); | ||
}, | ||
@@ -50,2 +61,3 @@ | ||
dataChanged: false, | ||
isPersisting: false, | ||
lines: 0, | ||
@@ -52,0 +64,0 @@ |
@@ -17,8 +17,13 @@ var fs = require('fs'); | ||
var string; | ||
if (arguments.length > 2) { | ||
string = Array.prototype.slice.call(arguments, 1).map(toString).join(' '); | ||
if (arguments.length > 1) { | ||
if (arguments.length > 2) { | ||
string = Array.prototype.slice.call(arguments, 1).map(toString).join(' '); | ||
} else { | ||
string = toString(arguments[1]); | ||
} | ||
string = label.toUpperCase() +': '+ string; | ||
} else { | ||
string = toString(arguments[1]); | ||
string = 'LOG: '+ arguments[0]; | ||
} | ||
string = label.toUpperCase() +': '+ string; | ||
if (string.indexOf('\n') !== -1) { | ||
@@ -32,2 +37,14 @@ string = '\n\n'+ string; | ||
module.exports.request = function() { | ||
var module, protocol; | ||
if (typeof arguments[0] === 'string') { | ||
protocol = url.parse(arguments[0]).protocol; | ||
} else { | ||
protocol = arguments[0].protocol; | ||
} | ||
// http/s (`Error: Protocol "https:" not supported. Expected "http:".`) | ||
module = require(protocol.replace(':', '')); | ||
return module.request.apply(null, arguments); | ||
}; | ||
module.exports.normalizeLink = function(link) { | ||
@@ -42,3 +59,5 @@ var parsed = url.parse(link); | ||
createFromTokens: function(escapedTokens) { | ||
return new RegExp('\\b('+ escapedTokens.join('|') +')\\b'); | ||
return new RegExp('\\b(' + | ||
escapedTokens.join('|').replace(/\s/g, '\\s') + | ||
')\\b'); | ||
} | ||
@@ -45,0 +64,0 @@ }; |
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
42583
28
1188
41
1
7