Comparing version 0.3.1 to 0.4.0
@@ -13,2 +13,9 @@ { | ||
] | ||
}, | ||
{ | ||
"name": "tired-topics", | ||
"type": "blacklist", | ||
"tokens": [ | ||
"Oculus", "Vive" | ||
] | ||
} | ||
@@ -27,6 +34,6 @@ ] | ||
"tokens": [ | ||
"C#", "\\.NET", "Angular", "BEM", "DuckDuckGo", "Java", "Kotlin", | ||
"Lua", "MatLab", "OCaml", "Perl", "R", "Raspberry\\s?Pi", "Rust", | ||
"Surface Pro", "Xamarin", "Vim", "Emacs", "Bitcoin", "LaTeX", | ||
"TypeScript", "C\\+\\+", "TPP", "Lisp", "Scheme", "Clojure", "F#" | ||
"\\.NET", "Angular", "BEM", "Clojure", "F#", "Java", "Lisp", "Lua", | ||
"MatLab", "OCaml", "Perl", "R", "Scheme", "Xamarin", "Emacs", | ||
"LaTeX", "Vim", "Azure", "DuckDuckGo", "Mesos", "Raspberry\\s?Pi", | ||
"Surface Pro", "Material Design" | ||
] | ||
@@ -39,4 +46,18 @@ }, | ||
"Trump", "Bernie", "Hillary", "San Bernardino", "Theranos", "FBI", | ||
"NSA" | ||
"NSA", "Brexit", "EU", "Meltdown", "Spectre", "Damore", "Zuckerberg" | ||
] | ||
}, | ||
{ | ||
"name": "no-comments", | ||
"input": "text-content", | ||
"type": "black-pattern", | ||
"pattern": ", 0 comments$" | ||
}, | ||
{ | ||
"name": "noisy", | ||
"type": "graylist", | ||
"tokens": [ | ||
"Amazon", "Apple", "Facebook", "Google", "Netflix", "SpaceX", | ||
"Tesla", "Bitcoin", "China", "Ethereum", "North Korea", "Russia" | ||
] | ||
} | ||
@@ -73,4 +94,6 @@ ] | ||
"tokens": [ | ||
"Jenner", "Indian?s?", "Modi", "Mallya", "Jaitley", | ||
"Quartz Daily Brief", "Quartz Weekend Brief" | ||
"Jenner", "Indian?s?", "Modi", "Mallya", "Jaitley", "Rajinikanth", | ||
"Quartz Daily Brief", "Quartz Weekend Brief", "stories you might have missed", | ||
"Bollywood", "Mumbai", "Delhi", "African?s?", "Nigerian?s?", "Goop", | ||
"Scientology" | ||
] | ||
@@ -82,5 +105,13 @@ }, | ||
"tokens": [ | ||
"Trump", "Bernie", "Sanders", "Hillary", "Clinton", "San Bernardino", | ||
"Brexit", "Sharapova", "hoverboard" | ||
"Trump", "Bernie", "Hillary", "Clinton", "San Bernardino", | ||
"Brexit", "hoverboard", "marijuana", "Jeff Sessions", "Silicon Valley" | ||
] | ||
}, | ||
{ | ||
"name": "noisy", | ||
"type": "graylist", | ||
"tokens": [ | ||
"Amazon", "Apple", "Facebook", "Google", "Netflix", "SpaceX", | ||
"Tesla" | ||
] | ||
} | ||
@@ -87,0 +118,0 @@ ] |
{ | ||
"name": "custom-rss", | ||
"version": "0.3.1", | ||
"version": "0.4.0", | ||
"description": "Filtering RSS because Zapier is too expensive.", | ||
@@ -5,0 +5,0 @@ "main": "src/feeds/index.js", |
@@ -8,3 +8,3 @@ # Custom RSS | ||
:zap: Filtering RSS because Zapier is too expensive (and Pipes is gone). | ||
:satellite: Filtering RSS because Zapier is too expensive (and Pipes is gone). | ||
@@ -24,4 +24,59 @@ ### Features | ||
The sample app is what's in the root directory. It's a barebones Connect app, with personal configuration in `config.json`. This is foremost for personal use, but what's in `/src` should be reusable with any Connect-like web framework. | ||
The sample app is what's in the root directory. It's a barebones Connect app, with personal configuration in `config.json`. This is foremost for personal use, but what's in `/src` should be reusable with any Connect-like web framework. Or to use as-is: | ||
```shell | ||
$ npm run develop | ||
$ subl config.json # continued below | ||
$ subl src/feeds/feedd.js # continued below | ||
$ npm run deploy | ||
$ git push origin master | ||
$ ssh <production> | ||
$ cd <site> # or `mkdir <site>; cd <site>` | ||
$ git pull origin master # or `git clone <repo>` | ||
$ npm start # or touch tmp/restart.txt | ||
$ exit | ||
$ curl <site-url>/feedd | ||
``` | ||
```js | ||
// config.json | ||
{ | ||
"feeds": [ | ||
// ... | ||
{ | ||
"name": "feedd", | ||
"filters": [ | ||
{ "name": "tired-topics", "type": "blacklist", "tokens": [ "Foo", "Bar", "Baz" ] } | ||
] | ||
} | ||
] | ||
} | ||
// src/feeds/feedd.js | ||
var fetchFeed = require('../fetch-feed'); | ||
var filterFeed = require('../filter-feed'); | ||
var url = require('url'); | ||
module.exports = function(config, request, response) { | ||
config.originalURL = 'http://feedd.com/rss.xml'; | ||
config.url = url.format({ | ||
protocol: 'http', host: request.headers.host, pathname: config.name | ||
}); | ||
fetchFeed({ | ||
url: config.originalURL, | ||
onResponse: function(resFetch, data) { | ||
response.setHeader('Content-Type', resFetch.headers['content-type']); | ||
filterFeed({ | ||
config: config, | ||
data: data, | ||
onDone: function(data) { response.end(data); } | ||
}); | ||
}, | ||
onError: function(e) { response.end(e.message); } | ||
}); | ||
}; | ||
``` | ||
### Feeds | ||
@@ -28,0 +83,0 @@ |
@@ -6,3 +6,3 @@ var fetchFeed = require('../fetch-feed'); | ||
var rScorePrefix = /^\d+\s+\S+\s+/; | ||
var rScorePrefix = /^(\[\w+\]\s)?\d+\s+\S+\s+/; | ||
@@ -21,3 +21,3 @@ function createDomainSuffix(entry) { | ||
// No score. | ||
var replaced = match.replace(rScorePrefix, ''); | ||
var replaced = match.replace(rScorePrefix, '$1'); | ||
// Add domain if any. | ||
@@ -24,0 +24,0 @@ replaced += createDomainSuffix(entry); |
@@ -10,3 +10,3 @@ var fetchFeed = require('../fetch-feed'); | ||
module.exports = function(config, request, response) { | ||
config.originalURL = 'http://qz.com/feed/'; | ||
config.originalURL = 'https://qz.com/feed/'; | ||
config.url = url.format({ | ||
@@ -13,0 +13,0 @@ protocol: 'http', host: request.headers.host, pathname: config.name |
@@ -20,6 +20,14 @@ var util = require('./util'); | ||
if (response.statusCode !== 200) { | ||
delegate.onError({ | ||
code: response.statusCode, | ||
message: response.statusMessage | ||
}); | ||
return; | ||
} | ||
var data = ''; | ||
response.setEncoding('utf8'); | ||
response.on('data', function(chunk) { | ||
data += chunk; | ||
data += chunk; | ||
}).on('end', function() { | ||
@@ -26,0 +34,0 @@ if (delegate.verbose) { util.log('data', data); } |
@@ -5,2 +5,3 @@ var createEntryLogger = require('./entry-logger'); | ||
var util = require('./util'); | ||
var patterns = util.patterns; | ||
@@ -11,5 +12,10 @@ var directory = require('path').join(__dirname, '../tmp'); | ||
return configs.map(function(config) { | ||
var filter = { name: config.name }; | ||
if (config.type === 'blacklist') { | ||
filter.pattern = util.patterns.createFromTokens(config.tokens); | ||
var filter = { name: config.name, type: config.type, input: config.input || 'title' }; | ||
switch (config.type) { | ||
case 'blacklist': | ||
case 'graylist': | ||
filter.pattern = patterns.createFromTokens(config.tokens); break; | ||
case 'black-pattern': | ||
filter.pattern = new RegExp(config.pattern); break; | ||
default: throw 'unsupported filter type'; | ||
} | ||
@@ -22,2 +28,4 @@ return filter; | ||
entry: function(root) { return root.find('entry'); }, | ||
content: function(entry) { return entry.find('content'); }, | ||
link: function(entry) { return entry.find('link'); }, | ||
@@ -28,5 +36,32 @@ title: function(entry) { return entry.find('title'); } | ||
function defaultShouldSkipEntry(entry, finders, filters, repostGuard) { | ||
var title = finders.title(entry); | ||
var text, title; | ||
var skip = filters.reduce(function(skip, filter) { | ||
return skip || filter.pattern.test(title); | ||
var input, matches; | ||
if (skip) { | ||
return skip; | ||
} | ||
switch (filter.input) { | ||
case 'text-content': | ||
if (!text) { text = patterns.stripTags(finders.content(entry)); } | ||
input = text; | ||
break; | ||
case 'title': | ||
if (!title) { title = finders.title(entry); } | ||
input = title; | ||
break; | ||
default: throw 'unsupported input type'; | ||
} | ||
matches = filter.pattern.test(input); | ||
if (matches && filter.type === 'graylist') { | ||
switch (filter.input) { | ||
case 'title': | ||
entry.transformContent('title', { to: function(match) { | ||
return '['+ filter.name +'] '+ title; | ||
}}); | ||
break; | ||
default: throw 'unsupported input type'; | ||
} | ||
return false; | ||
} | ||
return matches; | ||
}, false); | ||
@@ -44,2 +79,14 @@ if (skip) { skip = 'blocked'; } | ||
function mergeFinders(delegate) { | ||
return { | ||
entry: delegate.findEntry || defaultFinders.entry, | ||
content: delegate.findContent || defaultFinders.content, | ||
id: delegate.findId, | ||
link: delegate.findLink || defaultFinders.link, | ||
title: delegate.findTitle || defaultFinders.title | ||
}; | ||
} | ||
function main(delegate) { | ||
@@ -52,3 +99,3 @@ // delegate.config.filters: an array of filter objects for createFilters | ||
// delegate.logger: a logger whose 'logEntry' takes a dictionary | ||
// delegate.shouldSkipEntry: | ||
// delegate.shouldSkipEntry: | ||
// delegate.transform(Entry|Meta): optional transform functions that mutate given xml-transformer's 'string' | ||
@@ -65,9 +112,4 @@ | ||
var finders = { | ||
entry: delegate.findEntry || defaultFinders.entry, | ||
id: delegate.findId, | ||
link: delegate.findLink || defaultFinders.link, | ||
title: delegate.findTitle || defaultFinders.title | ||
}; | ||
var guard; | ||
var finders = mergeFinders(delegate); | ||
var guard; | ||
if (delegate.guardReposts !== false) { | ||
@@ -74,0 +116,0 @@ guard = createRepostGuard.shared; |
@@ -27,3 +27,3 @@ var fs = require('fs'); | ||
} else { | ||
string = 'LOG: '+ arguments[0]; | ||
string = 'LOG: '+ arguments[0]; | ||
} | ||
@@ -42,4 +42,7 @@ | ||
module.exports.patterns = { | ||
brackets: { open: /</g, close: />/g }, | ||
domain: /:\/\/(?:www\.)?([^\/]+)/, | ||
line: /\n/g, | ||
tag: /(<([^>]+)>)/g, | ||
createFromTokens: function(escapedTokens) { | ||
@@ -49,2 +52,11 @@ return new RegExp('\\b(' + | ||
')\\b'); | ||
}, | ||
decodeTags: function(string) { | ||
return string.replace(this.brackets.open, '<') | ||
.replace(this.brackets.close, '>'); | ||
}, | ||
stripTags: function(string) { | ||
return this.decodeTags(string).replace(this.tag, ''); | ||
} | ||
@@ -51,0 +63,0 @@ }; |
@@ -10,3 +10,4 @@ var assert = require('assert'); | ||
{ name: 'foo', type: 'blacklist', tokens: ['Foo'] }, | ||
{ name: 'bar', type: 'blacklist', tokens: ['Bar'] } | ||
{ name: 'bar', type: 'blacklist', tokens: ['Bar'] }, | ||
{ name: 'baz', type: 'graylist', tokens: ['Baz'] } | ||
]; | ||
@@ -16,3 +17,3 @@ | ||
var createFilters = filterFeed.createFilters; | ||
runner.subject('createFilters'); | ||
runner.subject('.createFilters'); | ||
@@ -33,15 +34,15 @@ test('creates filters from configs', function() { | ||
var filters = createFilters(fixtureFiltersConfig); | ||
var finders = { | ||
link: function(entry) { return entry.link; }, | ||
title: function(entry) { return entry.title; } | ||
var mockFinders = { | ||
link: function(mockEntry) { return mockEntry.link; }, | ||
title: function(mockEntry) { return mockEntry.title; } | ||
}; | ||
var repostGuard = { | ||
var mockRepostGuard = { | ||
checkLink: function(link) { return link !== 'repost.com'; } | ||
}; | ||
var defaultShouldSkipEntry = filterFeed.defaultShouldSkipEntry; | ||
runner.subject('defaultShouldSkipEntry'); | ||
runner.subject('.defaultShouldSkipEntry'); | ||
test("returns 'blocked' reason for filtered posts", function() { | ||
var entryToBlock = { title: 'Some title with Foo', link: 'foo.com' }; | ||
var skip = defaultShouldSkipEntry(entryToBlock, finders, filters, repostGuard); | ||
var skip = defaultShouldSkipEntry(entryToBlock, mockFinders, filters, mockRepostGuard); | ||
assert.equal(skip, 'blocked'); | ||
@@ -52,3 +53,3 @@ }); | ||
var entryToDedupe = { title: 'Some title', link: 'repost.com' }; | ||
var skip = defaultShouldSkipEntry(entryToDedupe, finders, filters, repostGuard); | ||
var skip = defaultShouldSkipEntry(entryToDedupe, mockFinders, filters, mockRepostGuard); | ||
assert.equal(skip, 'repost'); | ||
@@ -59,7 +60,19 @@ }); | ||
var entryToKeep = { title: 'Some title', link: 'foo.com' }; | ||
var skip = defaultShouldSkipEntry(entryToKeep, finders, filters, repostGuard); | ||
var skip = defaultShouldSkipEntry(entryToKeep, mockFinders, filters, mockRepostGuard); | ||
assert(!skip); | ||
}); | ||
test('also returns false for questionable posts, tags title ', function() { | ||
var entryToKeep = { title: 'Some title with Baz', link: 'baz.com', | ||
transformContent: function(tagName, args) { | ||
assert.equal(tagName, 'title'); | ||
this.title = args.to(); | ||
} | ||
}; | ||
var skip = defaultShouldSkipEntry(entryToKeep, mockFinders, filters, mockRepostGuard); | ||
assert(!skip); | ||
assert.equal(entryToKeep.title.indexOf('[baz] '), 0); | ||
}); | ||
runner.report(); |
@@ -9,4 +9,12 @@ var assert = require('assert'); | ||
runner.subject('normalizeLink'); | ||
runner.subject('.stripTags'); | ||
test('decodes then strips tags', function() { | ||
var html = '<p>1 point, <a href="https://news.ycombinator.com/item?id=12024279">0 comments</a></p>'; | ||
assert.equal(util.patterns.stripTags(html), '1 point, 0 comments'); | ||
}); | ||
runner.subject('.normalizeLink'); | ||
test('returns only host and pathname', function() { | ||
@@ -22,3 +30,3 @@ var link = 'http://some-domain.com/some-path?some-query#some-fragment'; | ||
runner.subject('callOn'); | ||
runner.subject('.callOn'); | ||
@@ -41,3 +49,3 @@ test('returns function that calls original on nth call', function() { | ||
runner.subject('nthIndexOf'); | ||
runner.subject('.nthIndexOf'); | ||
@@ -54,3 +62,3 @@ test('returns nth index of search value', function() { | ||
runner.subject('nthLastIndexOf'); | ||
runner.subject('.nthLastIndexOf'); | ||
@@ -57,0 +65,0 @@ test('returns nth last index of search value', function() { |
54451
1458
96