@extractus/feed-extractor - npm Package Compare versions

@extractus/feed-extractor

Package Overview

Dependencies

Maintainers

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 7.0.4 to 7.0.5

src/utils/parseRdfFeed.js

.eslintrc.json

		@@ -65,3 +65,3 @@ {
		{
		"max": 460,
		"max": 520,
		"skipBlankLines": true,
		@@ -74,3 +74,3 @@ "skipComments": false
		{
		"max": 150,
		"max": 240,
		"skipBlankLines": true
		@@ -77,0 +77,0 @@ }

package.json

		{
		"version": "7.0.4",
		"version": "7.0.5",
		"name": "@extractus/feed-extractor",
		@@ -4,0 +4,0 @@ "description": "To read and normalize RSS/ATOM/JSON feed data",

README.md

		@@ -117,2 +117,3 @@ # feed-extractor
		- [RSS Feed](https://www.rssboard.org/rss-specification)
		- [RDF Feed](https://web.resource.org/rss/1.0/spec)
		- [ATOM Feed](https://datatracker.ietf.org/doc/html/rfc5023)
		@@ -119,0 +120,0 @@ - [JSON Feed](https://www.jsonfeed.org/version/1.1/)

src/main.js

		@@ -6,6 +6,7 @@ // main.js
		import retrieve from './utils/retrieve.js'
		import { validate, xml2obj, isRSS, isAtom } from './utils/xmlparser.js'
		import { validate, xml2obj, isRSS, isAtom, isRdf } from './utils/xmlparser.js'
		import parseJsonFeed from './utils/parseJsonFeed.js'
		import parseRssFeed from './utils/parseRssFeed.js'
		import parseAtomFeed from './utils/parseAtomFeed.js'
		import parseRdfFeed from './utils/parseRdfFeed.js'

		@@ -46,2 +47,3 @@ const getopt = (options = {}) => {
		const data = xml2obj(xml, opts.xmlParserOptions)

		return isRSS(data)
		@@ -51,3 +53,5 @@ ? parseRssFeed(data, opts)
		? parseAtomFeed(data, opts)
		: null
		: isRdf(data)
		? parseRdfFeed(data, opts)
		: null
		}
		@@ -54,0 +58,0 @@

src/main.test.js

		@@ -141,2 +141,26 @@ // main.test

		test('extract rdf feed from Slashdot with extraFields', async () => {
		const url = 'https://some-news-page.tld/atom'
		const xml = readFileSync('test-data/rdf-standard.xml', 'utf8')
		const { baseUrl, path } = parseUrl(url)
		nock(baseUrl).get(path).reply(200, xml, {
		'Content-Type': 'application/xml',
		})
		const result = await extract(url, {
		getExtraFeedFields: data => {
		return {
		subject: data['dc:subject'],
		}
		},
		getExtraEntryFields: data => {
		return {
		author: data['dc:creator'],
		}
		},
		})
		expect(hasProperty(result, 'subject')).toBe(true)
		expect(hasProperty(result.entries[0], 'author')).toBe(true)
		expect(validateProps(result.entries[0])).toBe(true)
		})

		test('extract atom feed which contains multi links', async () => {
		@@ -295,2 +319,18 @@ const url = 'https://some-news-page.tld/atom/multilinks'

		test('extract rdf feed from Slashdot without normalization', async () => {
		const url = 'https://some-news-page.tld/atom'
		const xml = readFileSync('test-data/rdf-standard.xml', 'utf8')
		const { baseUrl, path } = parseUrl(url)
		nock(baseUrl).get(path).reply(200, xml, {
		'Content-Type': 'application/xml',
		})
		const result = await extract(url, {
		normalization: false,
		})
		expect(hasProperty(result.channel, 'syn:updateBase')).toBe(true)
		expect(hasProperty(result.channel, 'dc:rights')).toBe(true)
		expect(hasProperty(result, 'item')).toBe(true)
		expect(hasProperty(result.item[0], 'slash:department')).toBe(true)
		})

		test('extract atom feed from Google', async () => {
		@@ -363,3 +403,3 @@ const url = 'https://some-news-page.tld/atom'
		describe('test extract with `baseUrl` option', () => {
		test('extract rss feed with xml', () => {
		test('extract rss feed from file', () => {
		const baseUrl = 'https://huggingface.co'
		@@ -382,3 +422,22 @@ const xml = readFileSync('test-data/rss-feed-miss-base-url.xml', 'utf8')

		test('extract rss feed with json', () => {
		test('extract rdf feed from file', () => {
		const baseUrl = 'https://slashdot.org'
		const xml = readFileSync('test-data/rdf-standard.xml', 'utf8')
		const result = extractFromXml(xml, { baseUrl })

		feedAttrs.forEach((k) => {
		expect(hasProperty(result, k)).toBe(true)
		})

		entryAttrs.forEach((k) => {
		expect(hasProperty(result.entries[0], k)).toBe(true)
		})

		expect(validateProps(result.entries[0])).toBe(true)
		expect(result.link).toBe(baseUrl + '/')
		const firstItemLink = result.entries[0].link
		expect(firstItemLink.startsWith('https://tech.slashdot.org/story/23/08/23/2238246/spacex-')).toBe(true)
		})

		test('extract json feed from file', () => {
		const baseUrl = 'https://www.jsonfeed.org'
		@@ -385,0 +444,0 @@ const json = readFileSync('test-data/json-feed-miss-base-url.json', 'utf8')

src/utils/parseAtomFeed.js

		@@ -101,4 +101,6 @@ // parseAtomFeed.js

		const feedData = data.feed

		if (!normalization) {
		return flatten(data.feed, baseUrl)
		return flatten(feedData, baseUrl)
		}
		@@ -115,5 +117,5 @@
		entry: item = [],
		} = data.feed
		} = feedData

		const extraFields = getExtraFeedFields(data.feed)
		const extraFields = getExtraFeedFields(feedData)

		@@ -120,0 +122,0 @@ const items = isArray(item) ? item : [item]

src/utils/parseRssFeed.js

		@@ -106,4 +106,6 @@ // parseRssFeed.js

		const feedData = data.rss.channel

		if (!normalization) {
		return flatten(data.rss.channel, baseUrl)
		return flatten(feedData, baseUrl)
		}
		@@ -119,5 +121,5 @@
		item = [],
		} = data.rss.channel
		} = feedData

		const extraFields = getExtraFeedFields(data.rss.channel)
		const extraFields = getExtraFeedFields(feedData)

		@@ -124,0 +126,0 @@ const items = isArray(item) ? item : [item]

src/utils/xmlparser.js

		@@ -15,2 +15,6 @@ // utils / xmlparser

		export const isRdf = (data = {}) => {
		return hasProperty(data, 'rdf:RDF') && hasProperty(data['rdf:RDF'], 'channel')
		}

		export const validate = (xml) => {
		@@ -22,5 +26,5 @@ return (!isString(xml) \|\| !xml.length) ? false : XMLValidator.validate(xml) === true
		const options = {
		attributeNamePrefix: '@_',
		ignoreAttributes: false,
		...extraOptions,
		ignoreAttributes: false,
		attributeNamePrefix: '@_',
		}
		@@ -27,0 +31,0 @@ const parser = new XMLParser(options)

Improved metrics