Socket
Socket
Sign inDemoInstall

@extractus/feed-extractor

Package Overview
Dependencies
4
Maintainers
2
Versions
31
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 7.0.4 to 7.0.5

src/utils/parseRdfFeed.js

4

.eslintrc.json

@@ -65,3 +65,3 @@ {

{
"max": 460,
"max": 520,
"skipBlankLines": true,

@@ -74,3 +74,3 @@ "skipComments": false

{
"max": 150,
"max": 240,
"skipBlankLines": true

@@ -77,0 +77,0 @@ }

{
"version": "7.0.4",
"version": "7.0.5",
"name": "@extractus/feed-extractor",

@@ -4,0 +4,0 @@ "description": "To read and normalize RSS/ATOM/JSON feed data",

@@ -117,2 +117,3 @@ # feed-extractor

- [RSS Feed](https://www.rssboard.org/rss-specification)
- [RDF Feed](https://web.resource.org/rss/1.0/spec)
- [ATOM Feed](https://datatracker.ietf.org/doc/html/rfc5023)

@@ -119,0 +120,0 @@ - [JSON Feed](https://www.jsonfeed.org/version/1.1/)

@@ -6,6 +6,7 @@ // main.js

import retrieve from './utils/retrieve.js'
import { validate, xml2obj, isRSS, isAtom } from './utils/xmlparser.js'
import { validate, xml2obj, isRSS, isAtom, isRdf } from './utils/xmlparser.js'
import parseJsonFeed from './utils/parseJsonFeed.js'
import parseRssFeed from './utils/parseRssFeed.js'
import parseAtomFeed from './utils/parseAtomFeed.js'
import parseRdfFeed from './utils/parseRdfFeed.js'

@@ -46,2 +47,3 @@ const getopt = (options = {}) => {

const data = xml2obj(xml, opts.xmlParserOptions)
return isRSS(data)

@@ -51,3 +53,5 @@ ? parseRssFeed(data, opts)

? parseAtomFeed(data, opts)
: null
: isRdf(data)
? parseRdfFeed(data, opts)
: null
}

@@ -54,0 +58,0 @@

@@ -141,2 +141,26 @@ // main.test

test('extract rdf feed from Slashdot with extraFields', async () => {
const url = 'https://some-news-page.tld/atom'
const xml = readFileSync('test-data/rdf-standard.xml', 'utf8')
const { baseUrl, path } = parseUrl(url)
nock(baseUrl).get(path).reply(200, xml, {
'Content-Type': 'application/xml',
})
const result = await extract(url, {
getExtraFeedFields: data => {
return {
subject: data['dc:subject'],
}
},
getExtraEntryFields: data => {
return {
author: data['dc:creator'],
}
},
})
expect(hasProperty(result, 'subject')).toBe(true)
expect(hasProperty(result.entries[0], 'author')).toBe(true)
expect(validateProps(result.entries[0])).toBe(true)
})
test('extract atom feed which contains multi links', async () => {

@@ -295,2 +319,18 @@ const url = 'https://some-news-page.tld/atom/multilinks'

test('extract rdf feed from Slashdot without normalization', async () => {
const url = 'https://some-news-page.tld/atom'
const xml = readFileSync('test-data/rdf-standard.xml', 'utf8')
const { baseUrl, path } = parseUrl(url)
nock(baseUrl).get(path).reply(200, xml, {
'Content-Type': 'application/xml',
})
const result = await extract(url, {
normalization: false,
})
expect(hasProperty(result.channel, 'syn:updateBase')).toBe(true)
expect(hasProperty(result.channel, 'dc:rights')).toBe(true)
expect(hasProperty(result, 'item')).toBe(true)
expect(hasProperty(result.item[0], 'slash:department')).toBe(true)
})
test('extract atom feed from Google', async () => {

@@ -363,3 +403,3 @@ const url = 'https://some-news-page.tld/atom'

describe('test extract with `baseUrl` option', () => {
test('extract rss feed with xml', () => {
test('extract rss feed from file', () => {
const baseUrl = 'https://huggingface.co'

@@ -382,3 +422,22 @@ const xml = readFileSync('test-data/rss-feed-miss-base-url.xml', 'utf8')

test('extract rss feed with json', () => {
test('extract rdf feed from file', () => {
const baseUrl = 'https://slashdot.org'
const xml = readFileSync('test-data/rdf-standard.xml', 'utf8')
const result = extractFromXml(xml, { baseUrl })
feedAttrs.forEach((k) => {
expect(hasProperty(result, k)).toBe(true)
})
entryAttrs.forEach((k) => {
expect(hasProperty(result.entries[0], k)).toBe(true)
})
expect(validateProps(result.entries[0])).toBe(true)
expect(result.link).toBe(baseUrl + '/')
const firstItemLink = result.entries[0].link
expect(firstItemLink.startsWith('https://tech.slashdot.org/story/23/08/23/2238246/spacex-')).toBe(true)
})
test('extract json feed from file', () => {
const baseUrl = 'https://www.jsonfeed.org'

@@ -385,0 +444,0 @@ const json = readFileSync('test-data/json-feed-miss-base-url.json', 'utf8')

@@ -101,4 +101,6 @@ // parseAtomFeed.js

const feedData = data.feed
if (!normalization) {
return flatten(data.feed, baseUrl)
return flatten(feedData, baseUrl)
}

@@ -115,5 +117,5 @@

entry: item = [],
} = data.feed
} = feedData
const extraFields = getExtraFeedFields(data.feed)
const extraFields = getExtraFeedFields(feedData)

@@ -120,0 +122,0 @@ const items = isArray(item) ? item : [item]

@@ -106,4 +106,6 @@ // parseRssFeed.js

const feedData = data.rss.channel
if (!normalization) {
return flatten(data.rss.channel, baseUrl)
return flatten(feedData, baseUrl)
}

@@ -119,5 +121,5 @@

item = [],
} = data.rss.channel
} = feedData
const extraFields = getExtraFeedFields(data.rss.channel)
const extraFields = getExtraFeedFields(feedData)

@@ -124,0 +126,0 @@ const items = isArray(item) ? item : [item]

@@ -15,2 +15,6 @@ // utils / xmlparser

export const isRdf = (data = {}) => {
return hasProperty(data, 'rdf:RDF') && hasProperty(data['rdf:RDF'], 'channel')
}
export const validate = (xml) => {

@@ -22,5 +26,5 @@ return (!isString(xml) || !xml.length) ? false : XMLValidator.validate(xml) === true

const options = {
attributeNamePrefix: '@_',
ignoreAttributes: false,
...extraOptions,
ignoreAttributes: false,
attributeNamePrefix: '@_',
}

@@ -27,0 +31,0 @@ const parser = new XMLParser(options)

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc