Basic usage
const {fetchBySelector,utils} = require('crawler-lian');
fetchBySelector(uri, { selector: 'a', attr: 'href' }).then(console.log);
fetchBySelector(uri, { groups:[
{
groupName: 'list',
el: '.s_position_list > .item_con_list> .con_list_item',
...require('./list'),
selectors:[
{
selector: '.position_link',
attr: 'href',
name: 'detail_url'
},
{
selector: '.format-time',
attr: 'text',
name: 'time',
handler({ value }) {
return parseTime(value);
}
},
],
handler({ value }) {
return utils.removeSpace(value);
},
process ({ matchs }) => {
if (matchs && matchs.length > 0) {
return matchs[0]
}
},
itemProcess({ data }) {
let detail_url = data.detail_url;
if (detail_url) {
return new Promise((resolve) => {
fetchBySelector(detail_url, detailOptions).then(({ data: detailData }) => {
resolve({ ...data, ...detailData });
}).catch(console.log)
})
} else {
return data;
}
}
}
] }).then(console.log);
const option = {
deDuplication: false,
selector: 'a',
attr: 'text',
trim: true,
handler: null,
process: null,
test: null,
filter:null,
groups,
itemProcess: null
}