tabletojson
Advanced tools
Comparing version 1.1.1 to 1.2.0
@@ -75,5 +75,4 @@ 'use strict'; | ||
trs.each(function(i, row) { | ||
$(row) | ||
.find('th') | ||
.each(function(j, cell) { | ||
const cells = options.useFirstRowForHeadings ? $(row).find('td, th') : $(row).find('th'); | ||
cells.each(function(j, cell) { | ||
if (options.onlyColumns && !options.onlyColumns.includes(j)) return; | ||
@@ -107,3 +106,2 @@ if (options.ignoreColumns && !options.onlyColumns && options.ignoreColumns.includes(j)) return; | ||
let rowspans = []; | ||
let nextRowspans = []; | ||
@@ -124,2 +122,12 @@ // Fetch each row | ||
// Add content from rowspans | ||
rowspans.forEach((rowspan, index) => { | ||
if (!rowspan) return; | ||
setColumn(index, rowspan.content); | ||
rowspan.value--; | ||
}); | ||
const nextrowspans = [...rowspans]; | ||
const cells = options.useFirstRowForHeadings ? $(row).find('td, th') : $(row).find('td'); | ||
@@ -137,3 +145,11 @@ cells.each(function(j, cell) { | ||
// Apply rowspans offsets | ||
j += rowspans.filter(Boolean).length; | ||
let aux = j; | ||
j = 0; | ||
do { | ||
while (rowspans[j]) j++; | ||
while (aux && !rowspans[j]) { | ||
j++; | ||
aux--; | ||
} | ||
} while (aux); | ||
@@ -155,17 +171,10 @@ if (options.onlyColumns && !options.onlyColumns.includes(j)) return; | ||
const value = $(cell).attr('rowspan') - 1; | ||
if (value > 0) nextRowspans[j] = {content, value}; | ||
if (value > 0) nextrowspans[j] = {content, value}; | ||
}); | ||
// Add content from rowspans | ||
rowspans = nextrowspans; | ||
rowspans.forEach((rowspan, index) => { | ||
setColumn(index, rowspan.content); | ||
rowspan.value--; | ||
if (rowspan.value) nextRowspans[index] = rowspan; | ||
if (rowspan && rowspan.value === 0) rowspans[index] = null; | ||
}); | ||
rowspans = nextRowspans; | ||
nextRowspans = []; | ||
// Skip blank rows | ||
@@ -172,0 +181,0 @@ if (JSON.stringify(rowAsJson) !== '{}') tableAsJson.push(rowAsJson); |
@@ -9,3 +9,3 @@ { | ||
"description": "Converts HTML tables to JSON objects", | ||
"version": "1.1.1", | ||
"version": "1.2.0", | ||
"main": "./lib/tabletojson.js", | ||
@@ -32,6 +32,6 @@ "keywords": [ | ||
"devDependencies": { | ||
"config": "^1.31.0", | ||
"config": "^3.2.4", | ||
"coveralls": "^3.0.9", | ||
"eslint": "^5.16.0", | ||
"eslint-plugin-jest": "^21.27.2", | ||
"eslint": "^6.8.0", | ||
"eslint-plugin-jest": "^23.6.0", | ||
"eslint-plugin-prettier": "^3.1.2", | ||
@@ -38,0 +38,0 @@ "jest": "^24.9.0", |
413
README.md
@@ -1,3 +0,7 @@ | ||
[](https://nodei.co/npm/tabletojson/) | ||
# Table to JSON | ||
Attempts to convert HTML tables into JSON. | ||
[](https://nodei.co/npm/tabletojson) | ||
[](https://travis-ci.org/maugenst/tabletojson.svg?branch=master) | ||
@@ -8,12 +12,9 @@ [](https://coveralls.io/github/maugenst/tabletojson?branch=master) | ||
# Table to JSON | ||
Can be passed the markup for a single table as a string, a fragment of HTML or | ||
an entire page or just a URL (with an optional callback function; promises also | ||
supported). | ||
Attempts to convert HTML tables into JSON. | ||
The response is always an array. Every array entry in the response represents a | ||
table found on the page (in same the order they were found in the HTML). | ||
Can be passed the markup for a single table as a string, a fragment of HTML or an entire page or just | ||
a URL (with an optional callback function; promises also supported). | ||
The response is always an array. Every array entry in the response represents a table found on the page | ||
(in same the order they were found in the HTML). | ||
## Basic usage | ||
@@ -23,3 +24,3 @@ | ||
``` | ||
```sh | ||
npm install tabletojson | ||
@@ -41,6 +42,6 @@ ``` | ||
); | ||
``` | ||
### Local (`convert`) | ||
Have a look in the examples. | ||
@@ -62,12 +63,12 @@ | ||
### Duplicate column headings | ||
### Duplicate column headings | ||
If there are duplicate column headings, subsequent headings are suffixed with a count: | ||
If there are duplicate column headings, subsequent headings are suffixed with a | ||
count: | ||
``` | ||
// Table | ||
| PLACE | VALUE | PLACE | VALUE | | ||
| abc | 1 | def | 2 | | ||
PLACE | VALUE | PLACE | VALUE | ||
------|-------|-------|------ | ||
abc | 1 | def | 2 | ||
// Example output | ||
```js | ||
[{ | ||
@@ -79,8 +80,93 @@ PLACE: 'abc', VALUE: '1', | ||
### Tables with headings in the first column | ||
### Tables with rowspan | ||
If a table contains headings in the first column you might get an unexpected result, but you can pass a | ||
second argument with options with `{ useFirstRowForHeadings: true }` to have it treat the first column | ||
as it would any other cell. | ||
Having tables with rowspan, the content of the spawned cell must be available in | ||
the respective object. | ||
<table id="table11" class="table" border="1"> | ||
<thead> | ||
<tr> | ||
<th>Parent</th> | ||
<th>Child</th> | ||
<th>Age</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<td rowspan="3">Marry</td> | ||
<td>Sue</td> | ||
<td>15</td> | ||
</tr> | ||
<tr> | ||
<td>Steve</td> | ||
<td>12</td> | ||
</tr> | ||
<tr> | ||
<td>Tom</td> | ||
<td>3</td> | ||
</tr> | ||
</tbody> | ||
</table> | ||
```js | ||
[{ | ||
PARENT: 'Marry', CHILD: 'Tom', AGE, '3', | ||
PARENT: 'Marry', CHILD: 'Steve', AGE, '12', | ||
PARENT: 'Marry', CHILD: 'Sue', AGE, '15' | ||
}] | ||
``` | ||
### Tables with complex rowspan | ||
Having tables with complex rowspans, the content of the spawned cell must be available in the respective object. | ||
<table id="table12" class="table" border="1"> | ||
<thead> | ||
<tr> | ||
<th>Parent</th> | ||
<th>Child</th> | ||
<th>Age</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<td rowspan="3">Marry</td> | ||
<td>Sue</td> | ||
<td>15</td> | ||
</tr> | ||
<tr> | ||
<td>Steve</td> | ||
<td>12</td> | ||
</tr> | ||
<tr> | ||
<td rowspan="2">Tom</td> | ||
<td rowspan="2">3</td> | ||
</tr> | ||
<tr> | ||
<td rowspan="2">Taylor</td> | ||
</tr> | ||
<tr> | ||
<td>Peter</td> | ||
<td>17</td> | ||
</tr> | ||
</tbody> | ||
</table> | ||
```js | ||
[{ | ||
PARENT: 'Marry', CHILD: 'Sue', AGE, '15' | ||
PARENT: 'Marry', CHILD: 'Steve', AGE, '12', | ||
PARENT: 'Marry', CHILD: 'Tom', AGE, '3', | ||
PARENT: 'Taylor', CHILD: 'Tom', AGE, '3', | ||
PARENT: 'Taylor', CHILD: 'Peter', AGE, '17' | ||
}] | ||
``` | ||
### Tables with headings in the first column | ||
If a table contains headings in the first column you might get an unexpected | ||
result, but you can pass a second argument with options with | ||
`{ useFirstRowForHeadings: true }` to have it treat the first column as it would | ||
any other cell. | ||
``` javascript | ||
@@ -98,4 +184,4 @@ tabletojson.convertUrl( | ||
The following options are true by default, which converts all values to plain text to give you an easier | ||
more readable object to work with: | ||
The following options are true by default, which converts all values to plain | ||
text to give you an easier more readable object to work with: | ||
@@ -105,4 +191,4 @@ * stripHtmlFromHeadings | ||
If your table contains HTML you want to parse (for example for links) you can set `stripHtmlFromCells` | ||
to `false` to treat it as raw text. | ||
If your table contains HTML you want to parse (for example for links) you can | ||
set `stripHtmlFromCells` to `false` to treat it as raw text. | ||
@@ -114,3 +200,3 @@ ``` javascript | ||
function(tablesAsJson) { | ||
//Print out the 1st row from the 2nd table on the above webpage as JSON | ||
//Print out the 1st row from the 2nd table on the above webpage as JSON | ||
console.log(tablesAsJson[1][0]); | ||
@@ -123,15 +209,17 @@ } | ||
You probably don't need to set `stripHtmlFromHeadings` to false (and setting it to false can make the | ||
results hard to parse), but if you do you can also set both at the same time by setting `stripHtml` to | ||
false. | ||
You probably don't need to set `stripHtmlFromHeadings` to `false` (and setting | ||
it to false can make the results hard to parse), but if you do you can also set | ||
both at the same time by setting `stripHtml` to `false`. | ||
## Options | ||
### request (only `convertUrl`) | ||
If you need to get data from a remote server to pass it to the parser you can call `tabletojson.convertUrl`. | ||
When working behind a proxy you can pass any request-options (proxy, headers,...) by adding a request | ||
object to the options passed to `convertUrl`. | ||
for more information on how to configure request please have a look at: [https://github.com/request/request](https://github.com/request/request) | ||
If you need to get data from a remote server to pass it to the parser you can | ||
call `tabletojson.convertUrl`. When working behind a proxy you can pass any | ||
request-options (proxy, headers,...) by adding a request object to the options | ||
passed to `convertUrl`. for more information on how to configure request please | ||
have a look at | ||
[https://github.com/request/request](https://github.com/request/request) | ||
``` javascript | ||
@@ -147,10 +235,13 @@ tabletojson.convertUrl('https://www.timeanddate.com/holidays/ireland/2017', { | ||
### stripHtmlFromHeadings | ||
Strip any HTML from heading cells. Default is true. | ||
```md | ||
KEY | <b>VALUE</b> | ||
----|------------- | ||
abc | 1 | ||
dev | 2 | ||
``` | ||
// Table | ||
| KEY | <b>VALUE</b> | | ||
| abc | 1 | | ||
| dev | 2 | | ||
```js | ||
// Example output with stripHtmlFromHeadings:true | ||
@@ -161,3 +252,3 @@ [ | ||
}, | ||
{ | ||
{ | ||
KEY: 'dev', VALUE: '2' | ||
@@ -171,3 +262,3 @@ } | ||
}, | ||
{ | ||
{ | ||
KEY: 'dev', '<b>VALUE</b>': '2' | ||
@@ -182,8 +273,10 @@ } | ||
```md | ||
KEY | VALUE | ||
----|--------- | ||
abc | <i>1</i> | ||
dev | <i>2</i> | ||
``` | ||
// Table | ||
| KEY | VALUE | | ||
| abc | <i>1</i> | | ||
| dev | <i>2</i> | | ||
```js | ||
// Example output with stripHtmlFromHeadings:true | ||
@@ -194,3 +287,3 @@ [ | ||
}, | ||
{ | ||
{ | ||
KEY: 'dev', VALUE: '2' | ||
@@ -204,3 +297,3 @@ } | ||
}, | ||
{ | ||
{ | ||
KEY: 'dev', 'VALUE': '<i>2</i>' | ||
@@ -211,7 +304,7 @@ } | ||
### forceIndexAsNumber | ||
### forceIndexAsNumber | ||
Instead of using column text (that sometime re-order the data), force an index as a number (string number). | ||
``` javascript | ||
``` json | ||
// Some JSON (Other rows) | ||
@@ -231,11 +324,12 @@ { | ||
### countDuplicateHeadings | ||
Default is 'true'. If set to 'false' duplicate headings will not get a trailing _<NUMBER>. The value of | ||
the field will be the last value found in the table row: | ||
``` | ||
// Table | ||
| PLACE | VALUE | PLACE | VALUE | | ||
| abc | 1 | def | 2 | | ||
| ghi | 3 | jkl | 4 | | ||
Default is `true`. If set to `false`, duplicate headings will not get a trailing | ||
number. The value of the field will be the last value found in the table row: | ||
PLACE | VALUE | PLACE | VALUE | ||
------|-------|-------|------ | ||
abc | 1 | def | 2 | ||
ghi | 3 | jkl | 4 | ||
```js | ||
// Example output with countDuplicateHeadings:false | ||
@@ -246,3 +340,3 @@ [ | ||
}, | ||
{ | ||
{ | ||
PLACE: 'jkl', VALUE: '4' | ||
@@ -254,11 +348,12 @@ } | ||
### ignoreColumns | ||
Array of indexes to be ignored, starting with 0. Default is 'null/undefined'. | ||
``` | ||
// Table | ||
| NAME | PLACE | WEIGHT | SEX | AGE | | ||
| Mel | 1 | 58 | W | 23 | | ||
| Tom | 2 | 78 | M | 54 | | ||
| Bill | 3 | 92 | M | 31 | | ||
NAME | PLACE | WEIGHT | SEX | AGE | ||
------|-------|--------|-----|---- | ||
Mel | 1 | 58 | W | 23 | ||
Tom | 2 | 78 | M | 54 | ||
Bill | 3 | 92 | M | 31 | ||
```js | ||
// Example output with ignoreColumns: [2, 3] | ||
@@ -269,6 +364,6 @@ [ | ||
}, | ||
{ | ||
{ | ||
NAME: 'Tom', PLACE: '2', AGE: '54' | ||
}, | ||
{ | ||
{ | ||
NAME: 'Bill', PLACE: '3', AGE: '31' | ||
@@ -280,12 +375,13 @@ } | ||
### onlyColumns | ||
Array of indexes that are taken, starting with 0. Default is 'null/undefined'. | ||
If given, this option overrides ignoreColumns. | ||
``` | ||
// Table | ||
| NAME | PLACE | WEIGHT | SEX | AGE | | ||
| Mel | 1 | 58 | W | 23 | | ||
| Tom | 2 | 78 | M | 54 | | ||
| Bill | 3 | 92 | M | 31 | | ||
NAME | PLACE | WEIGHT | SEX | AGE | ||
------|-------|--------|-----|---- | ||
Mel | 1 | 58 | W | 23 | ||
Tom | 2 | 78 | M | 54 | ||
Bill | 3 | 92 | M | 31 | ||
```js | ||
// Example output with onlyColumns: [0, 4] | ||
@@ -296,6 +392,6 @@ [ | ||
}, | ||
{ | ||
{ | ||
NAME: 'Tom', AGE: '54' | ||
}, | ||
{ | ||
{ | ||
NAME: 'Bill', AGE: '31' | ||
@@ -307,12 +403,13 @@ } | ||
### ignoreHiddenRows | ||
Indicates if hidden rows (display:none) are ignored. Default is true: | ||
``` | ||
// Table | ||
| NAME | PLACE | WEIGHT | SEX | AGE | | ||
| Mel | 1 | 58 | W | 23 | | ||
| Tom | 2 | 78 | M | 54 | | ||
| Bill | 3 | 92 | M | 31 | | ||
*| Cat | 4 | 4 | W | 2 |* | ||
NAME | PLACE | WEIGHT | SEX | AGE | ||
------|-------|--------|-----|---- | ||
Mel | 1 | 58 | W | 23 | ||
Tom | 2 | 78 | M | 54 | ||
Bill | 3 | 92 | M | 31 | ||
* Cat | 4 | 4 | W | 2* | ||
```js | ||
// Example output with ignoreHiddenRows:true | ||
@@ -323,6 +420,6 @@ [ | ||
}, | ||
{ | ||
{ | ||
NAME: 'Tom', PLACE: '2', WEIGHT: '78', SEX: 'M', AGE: '54' | ||
}, | ||
{ | ||
{ | ||
NAME: 'Bill', PLACE: '3', WEIGHT: '92', SEX: 'M', AGE: '31' | ||
@@ -336,10 +433,10 @@ } | ||
}, | ||
{ | ||
{ | ||
NAME: 'Tom', PLACE: '2', WEIGHT: '78', SEX: 'M', AGE: '54' | ||
}, | ||
{ | ||
{ | ||
NAME: 'Bill', PLACE: '3', WEIGHT: '92', SEX: 'M', AGE: '31' | ||
} | ||
}, | ||
{ | ||
{ | ||
NAME: 'Cat', PLACE: '4', WEIGHT: '4', SEX: 'W', AGE: '2' | ||
@@ -351,16 +448,16 @@ } | ||
### headings | ||
Array of Strings to be used as headings. Default is 'null/undefined'. | ||
If more headings are given than columns exist the overcounting ones will be ignored. If less headings | ||
Array of Strings to be used as headings. Default is `null`/`undefined`. | ||
If more headings are given than columns exist the overcounting ones will be ignored. If less headings | ||
are given than existing values the overcounting values are ignored. | ||
``` | ||
// Table | ||
| NAME | PLACE | WEIGHT | SEX | AGE | | ||
| Mel | 1 | 58 | W | 23 | | ||
| Tom | 2 | 78 | M | 54 | | ||
| Bill | 3 | 92 | M | 31 | | ||
*| Cat | 4 | 4 | W | 2 |* | ||
NAME | PLACE | WEIGHT | SEX | AGE | ||
------|-------|--------|-----|---- | ||
Mel | 1 | 58 | W | 23 | ||
Tom | 2 | 78 | M | 54 | ||
Bill | 3 | 92 | M | 31 | ||
* Cat | 4 | 4 | W | 2* | ||
```js | ||
// Example output with headings: ['A','B','C','D','E'] | ||
@@ -371,6 +468,6 @@ [ | ||
}, | ||
{ | ||
{ | ||
A: 'Tom', B: '2', C: '78', D: 'M', E: '54' | ||
}, | ||
{ | ||
{ | ||
A: 'Bill', B: '3', C: '92', D: 'M', E: '31' | ||
@@ -384,6 +481,6 @@ } | ||
}, | ||
{ | ||
{ | ||
A: 'Tom', B: '2', C: '78' | ||
}, | ||
{ | ||
{ | ||
A: 'Bill', B: '3', C: '92' | ||
@@ -397,6 +494,6 @@ } | ||
}, | ||
{ | ||
{ | ||
A: 'Tom', B: '2', C: '78', D: 'M', E: '54' | ||
}, | ||
{ | ||
{ | ||
A: 'Bill', B: '3', C: '92', D: 'M', E: '31' | ||
@@ -410,6 +507,6 @@ } | ||
}, | ||
{ | ||
{ | ||
A: 'Tom', B: 'M', C: '54' | ||
}, | ||
{ | ||
{ | ||
A: 'Bill', B: 'M', C: '31' | ||
@@ -422,23 +519,25 @@ } | ||
### limitrows | ||
Number of rows to which the resulting object should be limited to. Default is 'null/undefined'. | ||
``` | ||
// Huge Table (see test/tables.html) | ||
| Roleplayer Number | Name | Text to say | | ||
| 0 | Raife Parkinson | re dolor in hendrerit in vulputate ve | | ||
| 1 | Hazel Schultz | usto duo dolores et ea rebum. Ste | | ||
| 2 | Montana Delgado | psum dolor sit amet. Lorem ipsum dolor sit ame | | ||
| 3 | Dianne Mcbride | olor sit amet. Lorem ipsum | | ||
| 4 | Xena Lynch | us est Lorem ipsum dol | | ||
| 5 | Najma Holding | akimata sanctus est Lorem ipsum dolor sit ame | | ||
| 6 | Kiki House | nvidunt ut | | ||
. | ||
. | ||
. | ||
| 197 | Montana Delgado | lores et ea rebum. Stet clita kasd gu | | ||
| 198 | Myrtle Conley | a rebum. Stet clita kasd gubergren, no sea taki | | ||
| 199 | Hanna Ellis | kimata sanctus est Lorem ipsum dolor si | | ||
Number of rows to which the resulting object should be limited to. Default is | ||
`null`/`undefined`. | ||
#### Huge Table (see test/tables.html) | ||
// Example output with limitrows: 5 | ||
Roleplayer Number | Name | Text to say | ||
------------------|-----------------|------------ | ||
0 | Raife Parkinson | re dolor in hendrerit in vulputate ve | ||
1 | Hazel Schultz | usto duo dolores et ea rebum. Ste | ||
2 | Montana Delgado | psum dolor sit amet. Lorem ipsum dolor | ||
3 | Dianne Mcbride | sit ame olor sit amet. Lorem ipsum | ||
4 | Xena Lynch | us est Lorem ipsum dol | ||
5 | Najma Holding | akimata sanctus est Lorem ipsum dolor sit | ||
6 | Kiki House | ame nvidunt ut | ||
...| | ||
197 | Montana Delgado | lores et ea rebum. Stet clita kasd gu a | ||
198 | Myrtle Conley | rebum. Stet clita kasd gubergren, no sea | ||
199 | Hanna Ellis | kimata sanctus est Lorem ipsum dolor si | ||
#### Example output with limitrows: 5 | ||
```js | ||
[ { 'Roleplayer Number': '0', | ||
@@ -462,17 +561,21 @@ Name: 'Raife Parkinson', | ||
### containsClasses | ||
Array of classes to find a specific table using this class. Default is 'null/undefined'. | ||
Array of classes to find a specific table using this class. Default is `null`/ | ||
`undefined`. | ||
## Known issues and limitations | ||
This module only supports parsing basic tables with a simple horizontal set of <th></th> headings and | ||
corresponding <td></td> cells. | ||
This module only supports parsing basic tables with a simple horizontal set of | ||
`<th></th>` headings and corresponding `<td></td>` cells. | ||
It can give useless or weird results on tables that have complex structures (such as nested tables) or | ||
multiple headers (such as on both X and Y axis). | ||
It can give useless or weird results on tables that have complex structures | ||
(such as nested tables) or multiple headers (such as on both X and Y axis). | ||
You'll need to handle things like work out which tables to parse and (in most cases) clean up the data. | ||
You might want to combine it it with modules like json2csv or CsvToMarkdownTable. | ||
You'll need to handle things like work out which tables to parse and (in most | ||
cases) clean up the data. You might want to combine it it with modules like | ||
json2csv or CsvToMarkdownTable. | ||
You might want to use it with a module like 'cheerio' if you want to parse specific tables identified | ||
by id or class (i.e. select them with cheerio and pass the HTML of them as a string). | ||
You might want to use it with a module like 'cheerio' if you want to parse | ||
specific tables identified by id or class (i.e. select them with cheerio and | ||
pass the HTML of them as a string). | ||
@@ -539,6 +642,6 @@ ## Example usage | ||
# Issues | ||
## Issues | ||
Right now the table needs to be "well formatted" to be convertable. Tables in tables with not be | ||
processed. | ||
Right now the table needs to be "well formatted" to be convertable. Tables in | ||
tables with not be processed. | ||
@@ -553,24 +656,36 @@ ```html | ||
# Contributing | ||
## Contributing | ||
Improvements, fixes and suggestions for better written modules that other people have created are welcome, as are bug | ||
reports against specific tables it is unable to handle. | ||
Improvements, fixes and suggestions for better written modules that other people | ||
have created are welcome, as are bug reports against specific tables it is | ||
unable to handle. | ||
You can find basic tests in the test folder. I implemented the most straight forward way in using the library. Nonetheless | ||
there are some edge cases that need to be tested and I would like to ask for support here. Feel free to fork and create | ||
PRs here. Every bit of help is appreciated. | ||
You can find basic tests in the test folder. I implemented the most straight | ||
forward way in using the library. Nonetheless there are some edge cases that | ||
need to be tested and I would like to ask for support here. Feel free to fork | ||
and create PRs here. Every bit of help is appreciated. | ||
To get also an insight you can use Iain's examples located in the example folder included with this project that shows | ||
usage and would be a good start. | ||
To get also an insight you can use Iain's examples located in the example folder | ||
included with this project that shows usage and would be a good start. | ||
If you submit a pull request, please add an example for your use case, so I can understand what you want it to do (as I | ||
want to get around to writing tests for this and want to understand the sort of use cases people have). | ||
If you submit a pull request, please add an example for your use case, so I can | ||
understand what you want it to do (as I want to get around to writing tests for | ||
this and want to understand the sort of use cases people have). | ||
# Thanks | ||
## Thanks | ||
June 2018 - Very special thanks to the originator of the library, Iain Collins (@iaincollins). Without his investigation in website | ||
grasping and mastering cheerio this lib would have not been where it is right now. Also I would personally like to say | ||
"Thank you" for your trust in passing me the ownership. @maugenst | ||
June 2018 - Very special thanks to the originator of the library, Iain Collins | ||
(@iaincollins). Without his investigation in website grasping and mastering | ||
cheerio this lib would have not been where it is right now. Also I would | ||
personally like to say "Thank you" for your trust in passing me the ownership. | ||
Marius (@maugenst) | ||
Additional thanks to @roryok, Max Thyen (@maxthyen), Thor Jacobsen (@twjacobsen) and Michael Keller (@mhkeller) for | ||
improvements and bug fixes. | ||
Additional thanks to | ||
* @roryok | ||
* Max Thyen (@maxthyen) | ||
* Thor Jacobsen (@twjacobsen) | ||
* Michael Keller (@mhkeller) | ||
* Jesús Leganés-Combarro (@piranna) | ||
for improvements and bug fixes. |
@@ -523,2 +523,7 @@ 'use strict'; | ||
// ENHANCEMENT: Coverage improvement to also cover rowspan tables | ||
// | PARENT | CHILD | AGE | | ||
// | | Sue | 15 | | ||
// | Marry | Steve | 12 | | ||
// | | Tom | 3 | | ||
it('Rowspan usage leads to correct object representation', async function() { | ||
@@ -535,8 +540,50 @@ const converted = await tabletojson.convert(html, { | ||
expect(_.has(table[0], 'Parent')).toBeTruthy(); | ||
expect(table.length).toBe(3); | ||
expect(table[0].Parent).toBe('Martha'); | ||
expect(table[1].Parent).toBe('Martha'); | ||
expect(table[1].Parent).toBe('Martha'); | ||
expect(table[0].Parent).toBe('Marry'); | ||
expect(table[1].Parent).toBe('Marry'); | ||
expect(table[2].Parent).toBe('Marry'); | ||
}); | ||
// ENHANCEMENT: Coverage improvement to also cover complex rowspan tables | ||
// | PARENT | CHILD | AGE | | ||
// +--------+-------+-----+ | ||
// | | Sue | 15 | | ||
// + +-------+-----+ | ||
// | Marry | Steve | 12 | | ||
// + +-------+-----+ | ||
// | | | | | ||
// +--------+ Tom | 3 + | ||
// | | | | | ||
// + Taylor +-------+-----+ | ||
// | | Peter | 17 | | ||
// +--------+-------+-----+ | ||
it('Complex rowspan usage leads to correct object representation', async function() { | ||
const converted = await tabletojson.convert(html, { | ||
id: ['table12'] | ||
}); | ||
expect(converted).toBeDefined(); | ||
expect(converted.length).toBe(1); | ||
const table = converted[0]; | ||
expect(table.length).toBe(5); | ||
expect(_.has(table[0], 'Parent')).toBeTruthy(); | ||
expect(table[0].Parent).toBe('Marry'); | ||
expect(table[1].Parent).toBe('Marry'); | ||
expect(table[2].Parent).toBe('Marry'); | ||
expect(table[3].Parent).toBe('Taylor'); | ||
expect(table[4].Parent).toBe('Taylor'); | ||
expect(table[0].Child).toBe('Sue'); | ||
expect(table[1].Child).toBe('Steve'); | ||
expect(table[2].Child).toBe('Tom'); | ||
expect(table[3].Child).toBe('Tom'); | ||
expect(table[4].Child).toBe('Peter'); | ||
expect(table[0].Age).toBe('15'); | ||
expect(table[1].Age).toBe('12'); | ||
expect(table[2].Age).toBe('3'); | ||
expect(table[3].Age).toBe('3'); | ||
expect(table[4].Age).toBe('17'); | ||
}); | ||
it('Options: containsClasses', async function() { | ||
@@ -543,0 +590,0 @@ const converted = await tabletojson.convert(html, { |
Sorry, the diff of this file is not supported yet
116349
1299
658