Comparing version 1.1.0 to 2.0.0
106
index.js
var fs = require('fs'); | ||
var _ = require('lodash'); | ||
var Q = require('q'); | ||
var cheerio = require('cheerio'); | ||
@@ -9,13 +8,8 @@ | ||
// Main cheerio object | ||
// Main DOM object | ||
var $; | ||
function Brightml(filename) { | ||
this.filename = filename; | ||
}; | ||
// Replace illegal markdown nested tables by a warning | ||
Brightml.prototype._removeNestedTables = function() { | ||
console.log('Removing nested tables from HTML...'); | ||
var message = '<b>===== Illegal nested table =====</b>'; | ||
function removeNestedTables() { | ||
var message = '<b>Illegal nested table :</b>'; | ||
// Check for nested tables | ||
@@ -35,4 +29,3 @@ $('table').has('table').each(function() { | ||
// The rest is in <tbody> in <td> elements | ||
Brightml.prototype._formatTables = function() { | ||
console.log('Properly formatting tables...'); | ||
function formatTables() { | ||
$('table').each(function() { | ||
@@ -47,13 +40,13 @@ $table = $(this); | ||
// First child might be a <caption> | ||
if (firstChildType === 'caption') { | ||
// Move it before the <table> tag | ||
$caption = $table.find('caption'); | ||
$caption.insertBefore($table); | ||
// Get actual remaining children to process | ||
$children = $table.children(); | ||
firstChildType = getTagName($children); | ||
} | ||
switch (firstChildType) { | ||
// First child might be a <caption> | ||
case 'caption': | ||
console.log('Moving caption...'); | ||
// Move it before the <table> tag | ||
$caption = $table.find('caption'); | ||
$caption.insertBefore($table); | ||
// Get actual remaining children to process | ||
$children = $table.children(); | ||
firstChildType = getTagName($children); | ||
// Case <tr>, move the first in a new <thead> and the others in a new <tbody> | ||
@@ -99,4 +92,3 @@ case 'tr': | ||
// Remove <p> from <th>/<td> table cells | ||
Brightml.prototype._cleanTableCells = function() { | ||
console.log('Cleaning up tables cells...'); | ||
function cleanTableCells() { | ||
$('td, th').has('p').each(function() { | ||
@@ -112,3 +104,3 @@ // Get paragraph content | ||
// Clean up attributes | ||
Brightml.prototype._cleanElements = function() { | ||
function cleanElements() { | ||
// Iterate over elements | ||
@@ -151,3 +143,3 @@ $('*').each(function() { | ||
// For empty <a> tags with an id attribute, set id on parent | ||
Brightml.prototype._setAnchorIds = function() { | ||
function setAnchorsId() { | ||
$('a').each(function() { | ||
@@ -166,3 +158,3 @@ var attributes = getTagAttributes($(this)); | ||
// Move local referenced tags before next <h1> | ||
Brightml.prototype._moveLocalReferences = function() { | ||
function moveLocalReferences() { | ||
$('a').each(function() { | ||
@@ -231,43 +223,37 @@ // Check if href is an id link | ||
function parse(html) { | ||
$ = cheerio.load(html); | ||
} | ||
function render() { | ||
return $.html(); | ||
} | ||
// Process an HTML string | ||
Brightml.prototype.process = function(html) { | ||
function clean(html) { | ||
// Convert to DOM using cheerio | ||
console.log('Parsing HTML...'); | ||
$ = cheerio.load(html); | ||
parse(html); | ||
// Cleanup elements | ||
console.log('Cleaning up...'); | ||
this._cleanElements(); | ||
this._setAnchorIds(); | ||
this._moveLocalReferences(); | ||
setAnchorsId(); | ||
moveLocalReferences(); | ||
cleanElements(); | ||
// Cleanup tables | ||
this._removeNestedTables(); | ||
this._formatTables(); | ||
this._cleanTableCells(); | ||
removeNestedTables(); | ||
formatTables(); | ||
cleanTableCells(); | ||
console.log('Done.'); | ||
return Q($.html()); | ||
return render(); | ||
}; | ||
// Process an HTML file | ||
Brightml.prototype.render = function() { | ||
var d = Q.defer(); | ||
var that = this; | ||
// Read file and convert to DOM using cheerio | ||
console.log('Reading HTML file: '+that.filename); | ||
Q.nfcall(fs.readFile, that.filename) | ||
.then(function(data) { | ||
return d.resolve(that.process(data)); | ||
}) | ||
.fail(function(err) { | ||
console.log('Error reading HTML file.'); | ||
console.log(err.stack); | ||
return d.reject(err); | ||
}); | ||
return d.promise; | ||
}; | ||
module.exports = Brightml; | ||
module.exports = { | ||
clean: clean, | ||
parse: parse, | ||
render: render, | ||
cleanElements: cleanElements, | ||
setAnchorsId: setAnchorsId, | ||
moveLocalReferences: moveLocalReferences, | ||
removeNestedTables: removeNestedTables, | ||
formatTables: formatTables, | ||
cleanTableCells: cleanTableCells | ||
}; |
{ | ||
"name": "brightml", | ||
"version": "1.1.0", | ||
"description": "Smart utility rendering markdown-ready HTML", | ||
"main": "index.js", | ||
"author": "GitBook Team <contact@gitbook.com>", | ||
"license": "Apache-2.0", | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/GitbookIO/brightml.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/GitbookIO/brightml/issues" | ||
}, | ||
"contributors": [ | ||
{ | ||
"name": "Johan Preynat", | ||
"email": "johan@gitbook.com" | ||
} | ||
], | ||
"dependencies": { | ||
"cheerio": "git://github.com/cheeriojs/cheerio.git#70c5608113d3efaf584efd29edafe173b74e106f", | ||
"lodash": "^3.10.1", | ||
"q": "^1.4.1" | ||
}, | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
"name": "brightml", | ||
"version": "2.0.0", | ||
"description": "Smart utility rendering markdown-ready HTML", | ||
"main": "index.js", | ||
"author": "GitBook Team <contact@gitbook.com>", | ||
"license": "Apache-2.0", | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/GitbookIO/brightml.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/GitbookIO/brightml/issues" | ||
}, | ||
"contributors": [ | ||
{ | ||
"name": "Johan Preynat", | ||
"email": "johan@gitbook.com" | ||
} | ||
], | ||
"dependencies": { | ||
"cheerio": "git://github.com/cheeriojs/cheerio.git#70c5608113d3efaf584efd29edafe173b74e106f", | ||
"lodash": "^3.10.1" | ||
}, | ||
"scripts": { | ||
"test": "node_modules/.bin/mocha -b --reporter spec --timeout 5000" | ||
}, | ||
"devDependencies": { | ||
"mocha": "^2.3.4", | ||
"should": "^8.0.2" | ||
} | ||
} |
108
README.md
@@ -13,50 +13,72 @@ # Brightml | ||
Brightml is promised-based. | ||
Clean all HTML at once : | ||
It can be used to render from a file: | ||
```JavaScript | ||
var Brightml = require('brightml'); | ||
var converter = new Brightml('index.html'); | ||
var brightml = require('brightml'); | ||
converter.render() | ||
.then(function (cleanHTML) { | ||
// Use your cleanHTML as intended | ||
}); | ||
var HTMLString = '<table><tr><td>Title 1</td><td>Title 2</td></tr><tr><td>Data 1</td><td>Data 2</td></tr></table>'; | ||
var cleanHTML = brightml.clean(HTMLString); | ||
// cleanHTML is : | ||
// <table> | ||
// <thead> | ||
// <tr> | ||
// <th>Title 1</th> | ||
// <th>Title 2</th> | ||
// </tr> | ||
// </thead> | ||
// <tbody> | ||
// <tr> | ||
// <td>Data 1</td> | ||
// <td>Data 2</td> | ||
// </tr> | ||
// </tbody> | ||
// </table> | ||
``` | ||
Or process an HTML string directly: | ||
Or use the module's functions as required : | ||
```JavaScript | ||
var Brightml = require('brightml'); | ||
var converter = new Brightml(); | ||
var brightml = require('brightml'); | ||
var HTMLString = '<table><tr><td>Title 1</td><td>Title 2</td></tr><tr><td>Data 1</td><td>Data 2</td></tr></table>'; | ||
converter.process(HTMLString) | ||
.then(function (cleanHTML) { | ||
// cleanHTML is now | ||
// <table> | ||
// <thead> | ||
// <tr> | ||
// <th>Title 1</th> | ||
// <th>Title 2</th> | ||
// </tr> | ||
// </thead> | ||
// <tbody> | ||
// <tr> | ||
// <td>Data 1</td> | ||
// <td>Data 2</td> | ||
// </tr> | ||
// </tbody> | ||
// </table> | ||
}); | ||
brightml.parse(HTMLString); | ||
brightml.formatTables(); | ||
var cleanHTML = brightml.render(); | ||
// cleanHTML is : | ||
// <table> | ||
// <thead> | ||
// <tr> | ||
// <th>Title 1</th> | ||
// <th>Title 2</th> | ||
// </tr> | ||
// </thead> | ||
// <tbody> | ||
// <tr> | ||
// <td>Data 1</td> | ||
// <td>Data 2</td> | ||
// </tr> | ||
// </tbody> | ||
// </table> | ||
``` | ||
## What gets done | ||
## What it does | ||
Brightml performs the following in order. | ||
Using `brightml.clean(html)` performs the following operations in order. | ||
#### Clean elements | ||
#### brightml.parse(HTMLString) | ||
Convert HTML to DOM using [cheerio](https://github.com/cheeriojs/cheerio). | ||
#### brightml.setAnchorsId() | ||
Try to set `<a>` tags `id` attribute on their direct parent if possible. | ||
#### brightml.moveLocalReferences() | ||
For each `<a>` tag's `href` attribute local link, move the referenced HTML element before the next `<h1>` tag. This feature is used to prevent breaking of local links and keep them in sight. | ||
#### brightml.cleanElements() | ||
* Remove empty tags. | ||
@@ -69,15 +91,7 @@ * Remove forbidden HTML tags and place their HTML content in a `<p>` instead. | ||
#### Set anchors `id` | ||
#### brightml.removeNestedTables() | ||
Try to set `<a>` tags `id` attribute on their direct parent if possible. | ||
#### Move local references | ||
For each `<a>` tag's `href` attribute local link, move the referenced HTML element before the next `<h1>` tag. This feature is used to prevent breaking of local links and keep them in sight. | ||
#### Remove nested `<table>` tags | ||
Replace nested `<table>` tags by a warning message followed by their content in a simple `<td>` tag. | ||
#### Evenly format `<table>` elements | ||
#### brightml.formatTables() | ||
@@ -114,4 +128,8 @@ Ensure every `<table>` elements look the same. | ||
#### Clean table cells | ||
#### brightml.cleanTableCells() | ||
Ensure every `<th>` and `<td>` tags don't contain a `<p>` tag to prevent line breaking. | ||
Ensure every `<th>` and `<td>` tags don't contain a `<p>` tag to prevent line breaking. | ||
#### brightml.render() | ||
Returns the current state of `HTMLString` passed to `brightml.parse(HTMLString)`. |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No tests
QualityPackage does not have any tests. This is a strong signal of a poorly maintained or low quality package.
Found 1 instance in 1 package
26721
2
6
639
1
132
2