Comparing version 0.3.7 to 0.3.8
@@ -156,3 +156,3 @@ "use strict"; | ||
try { | ||
html = html.replace(/<\s*(\w+).*?>/g, '<$1>'); | ||
html = html.replace(/<([a-z][a-z0-9]*)(?:[^>]*(\s(?:src|href)=['\"][^'\"]*['\"]))?[^>]*?(\/?)>/ig, '<$1$2$3>'); | ||
} catch (err) {} | ||
@@ -159,0 +159,0 @@ } |
{ | ||
"name": "read-art", | ||
"version": "0.3.7", | ||
"version": "0.3.8", | ||
"description": "Scrape/Crawl article from any site automatically. Make any web page readable, no matter Chinese or English.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -34,3 +34,3 @@ var read = require('../'), | ||
it('should have no attributes on element',function(done){ | ||
read('<title>文章抓取</title><body><div><p style="font-size: 12px" data-id="10000">Hola!!!! Real Madrid!!!!!!!!!!!!!!</p></div></body>', {tidyAttrs: true}, function(err, art){ | ||
read('<title>TIDY!!!!!</title><body><div><p style="font-size: 12px" data-id="10000">Hola!!!! <a href="real_madrid.htm">Real Madrid</a>!!!!!!!!!!!!!!<img src="real_madrid.jpg" /></p></div></body>', {tidyAttrs: true}, function(err, art){ | ||
should.not.exist(err); | ||
@@ -41,2 +41,4 @@ expect(art).to.be.an('object'); | ||
art.content.should.contain('Real Madrid'); | ||
art.content.should.contain(' href="real_madrid.htm"'); | ||
art.content.should.contain(' src="real_madrid.jpg"'); | ||
done(); | ||
@@ -43,0 +45,0 @@ }); |
57994
1384