htmlparser2
Advanced tools
Comparing version 2.1.3 to 2.2.0
//Types of elements found in the DOM | ||
module.exports = { | ||
Text: "text", //Plain text | ||
Directive: "directive", //Special tag <!...> | ||
Comment: "comment", //Special tag <!--...--> | ||
Script: "script", //Special tag <script>...</script> | ||
Style: "style", //Special tag <style>...</style> | ||
Tag: "tag", //Any tag that isn't special | ||
CDATA: "cdata" | ||
Text: "text", //Text | ||
Directive: "directive", //<? ... ?> | ||
Comment: "comment", //<!-- ... --> | ||
Script: "script", //<script> tags | ||
Style: "style", //<style> tags | ||
Tag: "tag", //Any tag | ||
CDATA: "cdata" //<![CDATA[ ... ]]> | ||
}; |
@@ -1,2 +0,2 @@ | ||
var DefaultHandler = require("./DefaultHandler.js"), | ||
var DomHandler = require("./DomHandler.js"), | ||
DomUtils = require("./DomUtils.js"); | ||
@@ -9,5 +9,5 @@ | ||
require("util").inherits(FeedHandler, DefaultHandler); | ||
require("util").inherits(FeedHandler, DomHandler); | ||
FeedHandler.prototype.init = DefaultHandler; | ||
FeedHandler.prototype.init = DomHandler; | ||
@@ -20,3 +20,3 @@ function getElements(what, where, one, recurse){ | ||
var ret = getElements(what, where, true, recurse); | ||
if(ret && (ret = ret.children) && ret.length > 0) return ret[0].data; | ||
if(ret && (ret = ret.children) && ret.length !== 0) return ret[0].data; | ||
else return false; | ||
@@ -105,5 +105,5 @@ } | ||
this.dom = feed; | ||
DefaultHandler.prototype._handleCallback.call(this); | ||
DomHandler.prototype._handleCallback.call(this); | ||
}; | ||
module.exports = FeedHandler; |
@@ -0,22 +1,36 @@ | ||
var defineProp = Object.defineProperty; | ||
module.exports = { | ||
get Parser(){ | ||
Object.defineProperty(this, "Parser", {value:require("./Parser.js")}); | ||
defineProp(this, "Parser", {value:require("./Parser.js")}); | ||
return this.Parser; | ||
}, | ||
get DefaultHandler(){ | ||
Object.defineProperty(this, "DefaultHandler", {value:require("./DefaultHandler.js")}); | ||
return this.DefaultHandler; | ||
get DomHandler(){ | ||
defineProp(this, "DomHandler", {value:require("./DomHandler.js")}); | ||
return this.DomHandler; | ||
}, | ||
get FeedHandler(){ | ||
Object.defineProperty(this, "FeedHandler", {value:require("./FeedHandler.js")}); | ||
defineProp(this, "FeedHandler", {value:require("./FeedHandler.js")}); | ||
return this.FeedHandler; | ||
}, | ||
get ElementType(){ | ||
Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")}); | ||
defineProp(this, "ElementType", {value:require("./ElementType.js")}); | ||
return this.ElementType; | ||
}, | ||
get Stream(){ | ||
defineProp(this, "Stream", {value:require("./Stream.js")}); | ||
return this.Stream; | ||
}, | ||
get DomUtils(){ | ||
Object.defineProperty(this, "DomUtils", {value:require("./DomUtils.js")}); | ||
defineProp(this, "DomUtils", {value:require("./DomUtils.js")}); | ||
return this.DomUtils; | ||
}, | ||
get DefaultHandler(){ | ||
defineProp(this, "DefaultHandler", {value: this.DomHandler}); | ||
return this.DefaultHandler; | ||
}, | ||
get RssHandler(){ | ||
defineProp(this, "RssHandler", {value: this.FeedHandler}); | ||
return this.FeedHandler; | ||
} | ||
} |
@@ -13,2 +13,3 @@ var ElementType = require("./ElementType.js"); | ||
this._done = false; | ||
this._paused = false; | ||
} | ||
@@ -52,3 +53,3 @@ | ||
this._buffer += data; //FIXME: this can be a bottleneck | ||
this._parseTags(); | ||
if(!this._paused) this._parseTags(); | ||
}; | ||
@@ -62,3 +63,7 @@ | ||
this._done = true; | ||
if(!this._paused) this._finishParsing(); | ||
}; | ||
Parser.prototype._finishParsing = function(){ | ||
//Parse the buffer to its end | ||
@@ -74,2 +79,12 @@ if(this._buffer) this._parseTags(true); | ||
Parser.prototype.pause = function(){ | ||
if(!this._done) this._paused = true; | ||
}; | ||
Parser.prototype.resume = function(){ | ||
this._paused = false; | ||
this._parseTags(); | ||
if(this._done) this._finishParsing(); | ||
}; | ||
//Resets the parser to a blank state, ready to parse a new HTML document | ||
@@ -107,2 +122,7 @@ Parser.prototype.reset = function(){ | ||
var TagValues = { | ||
style: 1, | ||
script: 2 | ||
}; | ||
//Parses through HTML text and returns an array of found elements | ||
@@ -119,3 +139,4 @@ Parser.prototype._parseTags = function(force){ | ||
while(opening !== closing){ //just false if both are -1 | ||
//opening !== closing is just false if both are -1 | ||
while(opening !== closing && !this._paused){ | ||
lastTagSep = this._tagSep; | ||
@@ -159,10 +180,6 @@ | ||
//if it's a closing tag, remove the flag | ||
if(this._contentFlags === SpecialTags[ElementType.Script] && elementData === "script"){ | ||
//remove the script flag | ||
this._contentFlags -= SpecialTags[ElementType.Script]; | ||
if(this._contentFlags >= TagValues[elementData]){ | ||
//remove the flag | ||
this._contentFlags -= TagValues[elementData]; | ||
} | ||
else if(this._contentFlags === SpecialTags[ElementType.Style] && elementData === "style"){ | ||
//remove the style flag | ||
this._contentFlags -= SpecialTags[ElementType.Style]; | ||
} | ||
else { | ||
@@ -193,3 +210,3 @@ this._writeSpecial(rawData, lastTagSep); | ||
else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep); | ||
//This tag is a directive | ||
//TODO: This isn't a processing instruction, needs a new name | ||
else if(this._cbs.onprocessinginstruction){ | ||
@@ -280,2 +297,8 @@ this._cbs.onprocessinginstruction( | ||
Parser.prototype._parseAttributes = function(data){ | ||
for(var match; match = _reAttrib.exec(data);){ | ||
this._cbs.onattribute(match[1], match[2] || match[3] || match[4] || match[1]); | ||
} | ||
}; | ||
Parser.prototype._processOpenTag = function(name, data){ | ||
@@ -287,5 +310,11 @@ var type = ElementType.Tag; | ||
if(this._cbs.onopentagname){ | ||
this._cbs.onopentagname(name); | ||
} | ||
if(this._cbs.onopentag){ | ||
this._cbs.onopentag(name, parseAttributes(data), type); | ||
} | ||
if(this._cbs.onattribute){ | ||
this._parseAttributes(data); | ||
} | ||
@@ -292,0 +321,0 @@ //If tag self-terminates, add an explicit, separate closing tag |
{ | ||
"name": "htmlparser2", | ||
"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.", | ||
"version": "2.1.3", | ||
"description": "Performance-optimized forgiving HTML/XML/RSS parser", | ||
"version": "2.2.0", | ||
"author": "Felix Boehm <me@feedic.com>", | ||
"keywords": ["html", "parser", "xml", "dom", "rss", "feed", "atom"], | ||
"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"], | ||
"contributors": ["Chris Winberry <chris@winberry.net>"], | ||
@@ -21,3 +21,3 @@ "repository": { | ||
"scripts": { | ||
"test": "node --harmony_proxies tests/00-runtests.js" | ||
"test": "node tests/00-runtests.js" | ||
}, | ||
@@ -24,0 +24,0 @@ "engines": "node >= 0.3.0", |
@@ -1,2 +0,3 @@ | ||
#htmlparser2 | ||
#htmlparser2 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser) | ||
A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output. | ||
@@ -7,30 +8,31 @@ | ||
##Running Tests | ||
node tests/00-runtests.js | ||
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)? | ||
This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. | ||
This project is linked to [Travis CI](http://travis-ci.org/). The latest builds status is: | ||
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)). | ||
[![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser) | ||
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be). | ||
##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)? | ||
This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is remarkably faster than the original. | ||
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected. | ||
Besides, the parser now provides the interface of [sax.js](https://github.com/isaacs/sax-js) (originally intended for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)). | ||
The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be). | ||
##Usage | ||
```javascript | ||
var htmlparser = require("htmlparser"); | ||
var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->"; | ||
var handler = new htmlparser.DefaultHandler(function (error, dom) { | ||
if (error) | ||
[...do something for errors...] | ||
else | ||
[...parsing done, do something...] | ||
console.log(dom); | ||
var htmlparser = require("htmlparser2"); | ||
var parser = new htmlparser.Parser({ | ||
onopentag: function(name, attribs){ | ||
if(name === "script" && attribs["language"] === "javascript"){ | ||
console.log("JS! Hooray!"); | ||
} | ||
}, | ||
ontext: function(text){ | ||
console.log("-->", text); | ||
}, | ||
onclosetag: function(tagname){ | ||
if(tagname === "script"){ | ||
console.log("That's it?!"); | ||
} | ||
} | ||
}); | ||
var parser = new htmlparser.Parser(handler); | ||
parser.write(rawHtml); | ||
parser.write("Xyz <script language= javascript>var foo = '<<bar>>';< / script>"); | ||
parser.done(); | ||
@@ -42,30 +44,15 @@ ``` | ||
```javascript | ||
[{ | ||
data: 'Xyz ', | ||
type: 'text' | ||
}, { | ||
type: 'script', | ||
name: 'script', | ||
attribs: { | ||
language: 'javascript' | ||
}, | ||
children: [{ | ||
data: 'var foo = \'<bar>\';<', | ||
type: 'text' | ||
}] | ||
}, { | ||
data: '<!-- Waah! -- ', | ||
type: 'comment' | ||
}] | ||
--> Xyz | ||
JS! Hooray! | ||
--> var foo = '<<bar>>'; | ||
That's it?! | ||
``` | ||
##Streaming To Parser | ||
```javascript | ||
while (...) { | ||
... | ||
parser.write(chunk); | ||
} | ||
parser.done(); | ||
``` | ||
Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options). | ||
##Get a DOM | ||
The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that may be manipulated using the `DomUtils` helper. | ||
Read more about the DomHandler in the [wiki](https://github.com/FB55/node-htmlparser/wiki/DomHandler). | ||
##Parsing RSS/RDF/Atom Feeds | ||
@@ -77,6 +64,2 @@ | ||
}); | ||
``` | ||
##Further reading | ||
* [Parser options](https://github.com/FB55/node-htmlparser/wiki/Parser-options) | ||
* [DefaultHandler options](https://github.com/FB55/node-htmlparser/wiki/DefaultHandler-options) | ||
``` |
var fs = require("fs"); | ||
var testCount = 0, | ||
failCount = 0, | ||
totalTime = 0; | ||
var runCount = 0, | ||
testCount = 0, | ||
failCount = 0; | ||
@@ -17,6 +17,6 @@ function runTests(test){ | ||
var second = false, | ||
failed = false, | ||
start = Date.now(), | ||
took = 0; | ||
failed = false; | ||
runCount++; | ||
console.log("Testing:", file.name); | ||
@@ -26,3 +26,2 @@ | ||
if(err) console.log("Handler error:", err); | ||
took += Date.now() - start; | ||
@@ -36,9 +35,8 @@ var expected = JSON.stringify(file.expected, null, 2), | ||
start = Date.now(); | ||
if(second){ | ||
testCount+=1; | ||
if(failed) failCount+=1; | ||
runCount--; | ||
testCount++; | ||
if(failed) failCount++; | ||
console.log("["+file.name+"]:",failed?"failed":"passed","(took",took,"ms)"); | ||
console.log("["+file.name+"]:", failed ? "failed":"passed"); | ||
} | ||
@@ -48,19 +46,25 @@ else second = true; | ||
}); | ||
var took = Date.now()-begin; | ||
totalTime+=took; | ||
console.log(test.dir,"took",took); | ||
console.log("->", test.dir.slice(1, -1), "iterated"); | ||
}; | ||
//run all tests | ||
["./01-html.js", "./02-feed.js", "./03-events.js", "./04-dom_utils.js"] | ||
.map(require) | ||
.forEach(runTests); | ||
[ | ||
"./01-html.js", | ||
"./02-feed.js", | ||
"./03-events.js", | ||
"./04-dom_utils.js", | ||
"./05-stream.js" | ||
].map(require).forEach(runTests); | ||
//log the results | ||
console.log("Total time:", totalTime); | ||
console.log("Total tests:", testCount); | ||
console.log("Failed tests:", failCount); | ||
if(failCount !== 0){ | ||
throw Error("Encountered " + failCount + " errors!"); | ||
} | ||
(function check(){ | ||
if(runCount !== 0){ | ||
return setTimeout(check, 50); | ||
} | ||
console.log("Total tests:", testCount); | ||
console.log("Failed tests:", failCount); | ||
if(failCount !== 0){ | ||
throw Error("Encountered " + failCount + " errors!"); | ||
} | ||
})(); |
//Runs tests for HTML | ||
var helper = require("./test-helper.js"), | ||
DefaultHandler = require("../lib/DefaultHandler.js"); | ||
DefaultHandler = require("../lib/DomHandler.js"); | ||
@@ -6,0 +6,0 @@ exports.dir = "/HTML/"; |
//generate a dom | ||
var handler = new (require("../lib/DefaultHandler.js"))(); | ||
var handler = new (require("../lib/DomHandler.js"))(); | ||
@@ -4,0 +4,0 @@ (new (require("../lib/Parser.js"))(handler)).parseComplete( |
@@ -6,2 +6,8 @@ exports.name = "simple"; | ||
{ | ||
"event": "opentagname", | ||
"data": [ | ||
"h1" | ||
] | ||
}, | ||
{ | ||
"event": "opentag", | ||
@@ -17,2 +23,9 @@ "data": [ | ||
{ | ||
"event": "attribute", | ||
"data": [ | ||
"class", | ||
"test" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
@@ -19,0 +32,0 @@ "data": [ |
@@ -6,2 +6,8 @@ exports.name = "Template script tags"; | ||
{ | ||
"event": "opentagname", | ||
"data": [ | ||
"script" | ||
] | ||
}, | ||
{ | ||
"event": "opentag", | ||
@@ -17,2 +23,9 @@ "data": [ | ||
{ | ||
"event": "attribute", | ||
"data": [ | ||
"type", | ||
"text/template" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
@@ -19,0 +32,0 @@ "data": [ |
@@ -1,2 +0,2 @@ | ||
exports.name = "simple"; | ||
exports.name = "Lowercase tags"; | ||
exports.options = {handler: {}, parser: {lowerCaseTags:true}}; | ||
@@ -6,2 +6,8 @@ exports.html = "<H1 class=test>adsf</H1>"; | ||
{ | ||
"event": "opentagname", | ||
"data": [ | ||
"h1" | ||
] | ||
}, | ||
{ | ||
"event": "opentag", | ||
@@ -17,2 +23,9 @@ "data": [ | ||
{ | ||
"event": "attribute", | ||
"data": [ | ||
"class", | ||
"test" | ||
] | ||
}, | ||
{ | ||
"event": "text", | ||
@@ -19,0 +32,0 @@ "data": [ |
@@ -1,2 +0,2 @@ | ||
exports.name = "simple"; | ||
exports.name = "CDATA"; | ||
exports.options = {handler: {}, parser: {}}; | ||
@@ -6,2 +6,8 @@ exports.html = "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag>"; | ||
{ | ||
"event": "opentagname", | ||
"data": [ | ||
"tag" | ||
] | ||
}, | ||
{ | ||
"event": "opentag", | ||
@@ -8,0 +14,0 @@ "data": [ |
@@ -9,50 +9,3 @@ exports.name = "RSS (2.0)"; | ||
exports.type = "rss"; | ||
//http://cyber.law.harvard.edu/rss/examples/rss2sample.xml | ||
exports.html = '<?xml version="1.0"?>\ | ||
<rss version="2.0">\ | ||
<channel>\ | ||
<title>Liftoff News</title>\ | ||
<link>http://liftoff.msfc.nasa.gov/</link>\ | ||
<description>Liftoff to Space Exploration.</description>\ | ||
<language>en-us</language>\ | ||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>\ | ||
\ | ||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>\ | ||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>\ | ||
<generator>Weblog Editor 2.0</generator>\ | ||
<managingEditor>editor@example.com</managingEditor>\ | ||
<webMaster>webmaster@example.com</webMaster>\ | ||
<item>\ | ||
\ | ||
<title>Star City</title>\ | ||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>\ | ||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>\ | ||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>\ | ||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>\ | ||
\ | ||
</item>\ | ||
<item>\ | ||
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>\ | ||
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>\ | ||
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>\ | ||
\ | ||
</item>\ | ||
<item>\ | ||
<title>The Engine That Does More</title>\ | ||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>\ | ||
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>\ | ||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>\ | ||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>\ | ||
\ | ||
</item>\ | ||
<item>\ | ||
<title>Astronauts\' Dirty Laundry</title>\ | ||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>\ | ||
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>\ | ||
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>\ | ||
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>\ | ||
\ | ||
</item>\ | ||
</channel>\ | ||
</rss>'; | ||
exports.html = require("fs").readFileSync(__dirname+"/../Documents/RSS_Example.xml").toString(); | ||
exports.expected = { | ||
@@ -59,0 +12,0 @@ type: "rss", |
@@ -9,29 +9,3 @@ exports.name = "Atom (1.0)"; | ||
exports.type = "rss"; | ||
//http://en.wikipedia.org/wiki/Atom_%28standard%29 | ||
exports.html = '<?xml version="1.0" encoding="utf-8"?>\ | ||
\ | ||
<feed xmlns="http://www.w3.org/2005/Atom">\ | ||
\ | ||
<title>Example Feed</title>\ | ||
<subtitle>A subtitle.</subtitle>\ | ||
<link href="http://example.org/feed/" rel="self" />\ | ||
<link href="http://example.org/" />\ | ||
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>\ | ||
<updated>2003-12-13T18:30:02Z</updated>\ | ||
<author>\ | ||
<name>John Doe</name>\ | ||
<email>johndoe@example.com</email>\ | ||
</author>\ | ||
\ | ||
<entry>\ | ||
<title>Atom-Powered Robots Run Amok</title>\ | ||
<link href="http://example.org/2003/12/13/atom03" />\ | ||
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>\ | ||
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>\ | ||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>\ | ||
<updated>2003-12-13T18:30:02Z</updated>\ | ||
<summary>Some text.</summary>\ | ||
</entry>\ | ||
\ | ||
</feed>'; | ||
exports.html = require("fs").readFileSync(__dirname+"/../Documents/Atom_Example.xml").toString(); | ||
exports.expected = { | ||
@@ -38,0 +12,0 @@ type: "atom", |
@@ -9,3 +9,3 @@ exports.name = "RDF test"; | ||
exports.html = '<?xml version="1.0" encoding="UTF-8"?>\n<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">\n\t<channel rdf:about="http://sfbay.craigslist.org/ccc/">\n\t\t<title>craigslist | all community in SF bay area</title>\n\t\t<link>http://sfbay.craigslist.org/ccc/</link>\n\t\t<description/>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:publisher>webmaster@craigslist.org</dc:publisher>\n\t\t<dc:creator>webmaster@craigslist.org</dc:creator>\n\t\t<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>\n\t\t<dc:title>craigslist | all community in SF bay area</dc:title>\n\t\t<dc:type>Collection</dc:type>\n\t\t<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>\n\t\t<syn:updateFrequency>4</syn:updateFrequency>\n\t\t<syn:updatePeriod>hourly</syn:updatePeriod>\n\t\t<items>\n\t\t\t<rdf:Seq>\n\t\t\t\t<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>\n\t\t\t</rdf:Seq>\n\t\t</items>\n\t</channel>\n\t<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">\n\t\t<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</link>\n\t\t<description><![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:35:17-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</dc:source>\n\t\t<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>\n\t</item>\n\t<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">\n\t\t<title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</link>\n\t\t<description><![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:34:54-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</dc:source>\n\t\t<dc:title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>\n\t</item>\n</rdf:RDF>'; | ||
exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString(); | ||
@@ -12,0 +12,0 @@ exports.expected = { |
@@ -6,3 +6,3 @@ exports.name = "Basic test"; | ||
}; | ||
exports.html = "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>"; | ||
exports.html = require("fs").readFileSync(__dirname + "/../Documents/Basic.html").toString(); | ||
exports.expected = [ | ||
@@ -9,0 +9,0 @@ { |
@@ -15,2 +15,2 @@ var Parser = require("../lib/Parser.js"), | ||
exports.EVENTS = ["cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag"/*, "error", "end"*/]; | ||
exports.EVENTS = ["attribute", "cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag", "opentagname"/*, "error", "end"*/]; |
75590
59
2963
62
7