tm-content-parser
Advanced tools
Comparing version 1.0.13 to 1.0.15
25
index.js
@@ -15,3 +15,4 @@ "use strict"; | ||
const parseScript = content => content.is('script') ? 'IGNORE' : null; | ||
const parseBasicHTML = (ele, markup) => ele.is("ul,ol,table,h1,h2,h3,iframe") ? parseParagraph(markup) : null; | ||
const parseHTMLTable = (ele, markup) => ele.is("table") ? parseTable(markup) : null; | ||
const parseBasicHTML = (ele, markup) => ele.is("ul,ol,h1,h2,h3,iframe") ? parseParagraph(markup) : null; | ||
const parseEscenicImage = (ele, content) => { | ||
@@ -162,2 +163,3 @@ if (ele.is('p')) { | ||
return parseScript(ele) || | ||
parseHTMLTable(ele, markup) || | ||
parseBasicHTML(ele, markup)|| | ||
@@ -203,3 +205,2 @@ parseEscenicImage(ele, content) || | ||
replaceEntityTags, | ||
removeNewlinesFromLists, | ||
replaceNewLineCharacters, | ||
@@ -211,2 +212,20 @@ formatHtml | ||
const parseTable = text => { | ||
const $ = cheerio.load(text, {decodeEntities: false}); | ||
const tableToArray = $("tr").toArray().map( (tr) => { | ||
const tds = $(tr).find("td"); | ||
return tds.map((idx, td) => $(td).text()).toArray(); | ||
}); | ||
const filtered = tableToArray.filter(arr => arr.length); // remove empty inner arrays [] | ||
const static_content = { | ||
type: 'table', | ||
body: JSON.stringify(filtered) | ||
}; | ||
return { | ||
type: 'static', | ||
static_content | ||
}; | ||
}; | ||
const removeEmptyHtmlTags = markup => { | ||
@@ -231,4 +250,2 @@ const processedMarkup = R.replace(/<(\w*)><\/\1>|<(\w*)>\s<\/\2>/g, '', markup); | ||
const removeNewlinesFromLists = R.replace(/\/li>([^<]+)<li>/g, '\/li><li>'); | ||
const replaceNewLineCharacters = R.replace(/\n/g, '<br>'); | ||
@@ -235,0 +252,0 @@ |
{ | ||
"name": "tm-content-parser", | ||
"version": "1.0.13", | ||
"version": "1.0.15", | ||
"description": "Trinity Mirror Content Type Parser", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -107,16 +107,2 @@ "use strict"; | ||
describe('.removeNewlinesFromLists', () => { | ||
const removeNewlinesFromLists = contentTypeParser.__get__("removeNewlinesFromLists"); | ||
it('should replace new line characters with correct html tags', () => { | ||
removeNewlinesFromLists( | ||
'<p>This is a paragraph with \n new \n lines \n <ul><li>Coffee</li>\n<li>Tea</li>\n<li>Milk</li></ul></p>' | ||
).should.eql( | ||
'<p>This is a paragraph with \n new \n lines \n <ul><li>Coffee</li><li>Tea</li><li>Milk</li></ul></p>' | ||
); | ||
}); | ||
}); | ||
describe('.replaceNewLineCharacters', () => { | ||
@@ -157,2 +143,31 @@ | ||
describe('test table parser', () => { | ||
const table = ` | ||
<table> | ||
<tr> | ||
<th>Company</th> | ||
<th>Contact</th> | ||
<th>Country</th> | ||
</tr> | ||
<tr> | ||
<td>Alfreds Futterkiste</td> | ||
<td>Maria Anders</td> | ||
<td>Germany</td> | ||
</tr> | ||
<tr> | ||
<td>Centro comercial Moctezuma</td> | ||
<td>Francisco Chang</td> | ||
<td>Mexico</td> | ||
</tr> | ||
</table>`; | ||
const parseTable = contentTypeParser.__get__("parseTable"); | ||
it('should correctly parse table', () => { | ||
const parsed = parseTable(table); | ||
parsed.type.should.eql("static"); | ||
parsed.static_content.type.should.eql("table"); | ||
parsed.static_content.body.should.eql(`[["Alfreds Futterkiste","Maria Anders","Germany"],["Centro comercial Moctezuma","Francisco Chang","Mexico"]]`); | ||
}); | ||
}); | ||
describe('idAfterContent', () => { | ||
@@ -159,0 +174,0 @@ const idAfterContent = contentTypeParser.__get__("idAfterContent"); |
70889
12
1411