Comparing version 0.6.3 to 0.7.0
# CHANGES to httpquery | ||
## 0.7.0 | ||
- feat: Use any `req.jsonData` set by middleware | ||
- refactor: Improve code readability | ||
## 0.6.3 | ||
@@ -4,0 +9,0 @@ |
@@ -1,3 +0,4 @@ | ||
// Todo: Make this integratable into a pipeline; ensure can use HTML or XML DOM with content-type accordingly | ||
// Use JSDOM or http://zombie.labnotes.org/ ? | ||
// Todo: More middleware passing options besides jsonData; ensure can use | ||
// HTML or XML DOM with content-type accordingly | ||
// Use JSDOM? | ||
@@ -15,9 +16,111 @@ import {readFile} from 'fs/promises'; | ||
res.writeHead(code, responseHeaders); | ||
res.end(fileContents); // + '\n' | ||
res.end(fileContents); // + '\n' | ||
}; | ||
const clientSupportCheck = (req, str) => { | ||
return req.headers['query-client-support'] && | ||
req.headers['query-client-support'].trim().split(/\s+/u).includes(str); | ||
req.headers['query-client-support'].trim().split( | ||
/\s+/u | ||
).includes(str); | ||
}; | ||
const handleJsonata = ({ | ||
req, res, responseHeaders, fileContents, exitError, finish | ||
}) => { | ||
const jsonataExpression = jsonata( | ||
req.headers['query-jsonata'].trim() | ||
); | ||
const bindings = req.headers['query-bindings']?.trim(); | ||
jsonataExpression.evaluate( | ||
'jsonData' in req | ||
? req.jsonData | ||
: JSON.parse(fileContents.toString('utf8')), | ||
bindings | ||
? JSON.parse(bindings) | ||
: {}, | ||
// eslint-disable-next-line promise/prefer-await-to-callbacks -- jsonata | ||
(error, result) => { | ||
if (error) { | ||
exitError(res, responseHeaders, error.message); | ||
return; | ||
} | ||
const queryResult = JSON.stringify(result); | ||
finish(queryResult); | ||
} | ||
); | ||
}; | ||
const handleXpath1 = ({ | ||
req, wrapFragment, fileContents, forceJSON | ||
}) => { | ||
const nodeArrayToSerializedArray = (arr) => { | ||
return arr.map((node) => { | ||
return node.toString(); | ||
}); | ||
}; | ||
const doc = new xmldom.DOMParser().parseFromString(String(fileContents)); | ||
const xpath1Request = req.headers['query-xpath1'] && | ||
req.headers['query-xpath1'].trim(); | ||
// || '//b[position() > 1 and position() < 4]'; // || '//b/text()', | ||
let queryResult; | ||
queryResult = xpath.select(xpath1Request, doc); | ||
queryResult = forceJSON | ||
? nodeArrayToSerializedArray(queryResult) | ||
: wrapFragment(nodeArrayToSerializedArray(queryResult).join('')); | ||
return queryResult; | ||
}; | ||
const handleCSS3 = ({req, fileContents, forceJSON, wrapFragment}) => { | ||
// Support our own custom :text() and :attr(...) pseudo-classes (todo: do | ||
// as (two-colon) pseudo-elements instead) | ||
const $ = cheerio.load(String(fileContents)); | ||
const [ | ||
, | ||
css3Request, | ||
type = forceJSON ? 'toArray' : 'toString', | ||
css3Attr | ||
] = (req.headers['query-css3'] && req.headers['query-css3'].trim().match( | ||
// eslint-disable-next-line unicorn/no-unsafe-regex -- Todo | ||
/(.*?)(?::(text|attr)\(([^)]*)\))?$/u | ||
)) || []; // Allow explicit "html" (toString) or "toArray" (or "json")? | ||
const nodeArrayToSerializedArray = (items) => { | ||
return [...items.map((i, elem) => { | ||
return $.html(elem); | ||
})]; | ||
}; | ||
let queryResult; | ||
switch (type) { | ||
case 'attr': | ||
// Only gets one attribute anyways, so no need to handle differently for | ||
// JSON (except the stringify below) | ||
queryResult = $(css3Request).attr(css3Attr); | ||
break; | ||
case 'toArray': | ||
// $(css3Request).toString(); handles merging | ||
queryResult = nodeArrayToSerializedArray($(css3Request)); | ||
break; | ||
// Todo: Change 'text' to return array of text nodes in case of JSON? | ||
case 'text': | ||
queryResult = $(css3Request)[type](); | ||
break; | ||
case 'toString': | ||
default: | ||
// Don't merge with next line as intermediate queryResult may be needed | ||
// by `wrapFragment` | ||
queryResult = $(css3Request); | ||
// $(css3Request).toString(); handles merging | ||
queryResult = wrapFragment( | ||
nodeArrayToSerializedArray(queryResult).join('') | ||
); | ||
break; | ||
} | ||
return queryResult; | ||
}; | ||
/** | ||
@@ -34,3 +137,6 @@ * @param {PlainObject} [cfg] | ||
const errorMessage = debug ? err : 'ERROR'; | ||
write(res, 404, responseHeaders, '<div style="color:red;font-weight:bold">' + errorMessage + '</div>'); | ||
write( | ||
res, 404, responseHeaders, | ||
`<div style="color:red;font-weight:bold">${errorMessage}</div>` | ||
); | ||
}; | ||
@@ -62,2 +168,3 @@ | ||
const forceJSON = req.headers['query-format'] === 'json'; | ||
const resultContentType = isXHTML | ||
@@ -74,7 +181,10 @@ ? 'application/xhtml+xml' | ||
: 'text/html'; | ||
const responseHeaders = { | ||
'Content-Type': isJSON || forceJSON ? 'application/json' : resultContentType | ||
'Content-Type': isJSON || forceJSON | ||
? 'application/json' | ||
: resultContentType | ||
}; | ||
const finish = () => { | ||
const finish = (queryResult) => { | ||
fileContents = forceJSON ? JSON.stringify(queryResult) : queryResult; | ||
@@ -90,5 +200,9 @@ | ||
url = url.replace(/(\/|\/\?.*)$/u, '/index.html').replace(/\?.*$/u, '') || 'index.html'; | ||
// url = require('url').parse(url).pathname; // Need to strip off request parameters? | ||
url = url.replace(/(\/|\/\?.*)$/u, '/index.html').replace(/\?.*$/u, '') || | ||
'index.html'; | ||
// Need to strip off request parameters? | ||
// url = require('url').parse(url).pathname; | ||
// console.log('url:'+url); | ||
if (forceJSON) { | ||
@@ -98,6 +212,13 @@ responseHeaders['query-content-type'] = resultContentType; | ||
if (req.headers['query-client-support'] && !req.headers['query-xpath1'] && !req.headers['query-css3'] && !req.headers['query-full-request']) { | ||
if ( | ||
req.headers['query-client-support'] && !req.headers['query-xpath1'] && | ||
!req.headers['query-css3'] && !req.headers['query-full-request'] | ||
) { | ||
responseHeaders['query-server-support'] = 'xpath1, css3, jsonata'; | ||
write(res, 200, responseHeaders, ''); // Don't waste bandwidth if client supports protocol and hasn't asked us to deliver the full document | ||
// Todo: we should allow delivery of a default payload (e.g., full doc if not specified as requesting empty for feature detection+immediate execution if supported) | ||
// Don't waste bandwidth if client supports protocol and hasn't asked | ||
// us to deliver the full document | ||
write(res, 200, responseHeaders, ''); | ||
// Todo: we should allow delivery of a default payload (e.g., full | ||
// doc if not specified as requesting empty for feature detection + | ||
// immediate execution if supported) | ||
} else { | ||
@@ -113,88 +234,58 @@ responseHeaders['query-server-support'] = 'xpath1, css3, jsonata'; | ||
let fileContents; | ||
try { | ||
fileContents = await readFile(join(cwd, path, url)); | ||
} catch (err) { | ||
exitError(res, responseHeaders, err.message); | ||
return; | ||
if (!('jsonData' in req)) { | ||
try { | ||
fileContents = await readFile(join(cwd, path, url)); | ||
} catch (err) { | ||
exitError(res, responseHeaders, err.message); | ||
return; | ||
} | ||
} | ||
const wrapFragment = (frag) => { | ||
if (isHTML) { // || queryResult.length <= 1) { // No need to wrap for HTML or single result sets as no well-formedness requirements | ||
if (isHTML) { | ||
// No need to wrap for HTML or single result sets as no | ||
// well-formedness requirements | ||
// || queryResult.length <= 1) { | ||
return frag; | ||
} | ||
const tag = 'div xmlns="http://www.w3.org/1999/xhtml"'; | ||
return '<' + tag + '>' + frag + '</' + tag.match(/^\w*/u)[0] + '>'; | ||
return `<${tag}>${frag}</${ | ||
tag.match(/^\w*/u)[0] | ||
}>`; | ||
}; | ||
let queryResult; | ||
if ((ignoreQuerySupport || clientJSONPathSupport) && req.headers['query-jsonata'] && !req.headers['query-full-request']) { | ||
const jsonataExpression = jsonata( | ||
req.headers['query-jsonata'].trim() | ||
); | ||
const bindings = req.headers['query-bindings']?.trim(); | ||
jsonataExpression.evaluate( | ||
JSON.parse(fileContents.toString('utf8')), | ||
bindings ? JSON.parse(bindings) : {}, | ||
// eslint-disable-next-line promise/prefer-await-to-callbacks -- jsonata API | ||
(error, result) => { | ||
if (error) { | ||
exitError(res, responseHeaders, error.message); | ||
return; | ||
} | ||
queryResult = JSON.stringify(result); | ||
finish(); | ||
} | ||
); | ||
if ( | ||
(ignoreQuerySupport || clientJSONPathSupport) && | ||
req.headers['query-jsonata'] && !req.headers['query-full-request'] | ||
) { | ||
handleJsonata({ | ||
req, res, responseHeaders, fileContents, exitError, finish | ||
}); | ||
return; | ||
} else if ((ignoreQuerySupport || clientXPath1Support) && req.headers['query-xpath1'] && !req.headers['query-full-request']) { | ||
const nodeArrayToSerializedArray = (arr) => { | ||
return arr.map((node) => { | ||
return node.toString(); | ||
}); | ||
}; | ||
const doc = new xmldom.DOMParser().parseFromString(String(fileContents)); | ||
const xpath1Request = req.headers['query-xpath1'] && req.headers['query-xpath1'].trim(); // || '//b[position() > 1 and position() < 4]'; // || '//b/text()', | ||
queryResult = xpath.select(xpath1Request, doc); | ||
queryResult = forceJSON ? nodeArrayToSerializedArray(queryResult) : wrapFragment(nodeArrayToSerializedArray(queryResult).join('')); | ||
} else if ((ignoreQuerySupport || clientCSS3Support) && req.headers['query-css3'] && !req.headers['query-full-request']) { | ||
// Support our own custom :text() and :attr(...) pseudo-classes (todo: do as (two-colon) pseudo-elements instead) | ||
const $ = cheerio.load(String(fileContents)); | ||
// eslint-disable-next-line unicorn/no-unsafe-regex -- Todo | ||
const css3RequestFull = req.headers['query-css3'] && req.headers['query-css3'].trim().match(/(.*?)(?::(text|attr)\(([^)]*)\))?$/u); // Allow explicit "html" (toString) or "toArray" (or "json")? | ||
const css3Request = css3RequestFull[1]; | ||
const type = css3RequestFull[2] || (forceJSON ? 'toArray' : 'toString'); | ||
const css3Attr = css3RequestFull[3]; | ||
} | ||
const nodeArrayToSerializedArray = (items) => { | ||
/* return arr.map((node) => { | ||
return node; //.html(); | ||
}); */ | ||
return [...items.map((i, elem) => { | ||
return $.html(elem); | ||
})]; | ||
}; | ||
switch (type) { | ||
case 'attr': // Only gets one attribute anyways, so no need to handle differently for JSON (except the stringify below) | ||
queryResult = $(css3Request).attr(css3Attr); | ||
break; | ||
case 'toArray': | ||
queryResult = nodeArrayToSerializedArray($(css3Request)); // $(css3Request).toString(); handles merging | ||
break; | ||
// Todo: Change 'text' to return array of text nodes in case of JSON? | ||
case 'text': | ||
queryResult = $(css3Request)[type](); | ||
break; | ||
case 'toString': | ||
default: | ||
queryResult = $(css3Request); // Don't merge with next line as intermediate queryResult may be needed by wrapFragment | ||
queryResult = wrapFragment(nodeArrayToSerializedArray(queryResult).join('')); // $(css3Request).toString(); handles merging | ||
break; | ||
} | ||
if ( | ||
// XPATH 1 | ||
(ignoreQuerySupport || clientXPath1Support) && | ||
req.headers['query-xpath1'] && !req.headers['query-full-request'] | ||
) { | ||
queryResult = handleXpath1({ | ||
req, wrapFragment, fileContents, forceJSON | ||
}); | ||
} else if ( | ||
// CSS3 | ||
(ignoreQuerySupport || clientCSS3Support) && | ||
req.headers['query-css3'] && !req.headers['query-full-request'] | ||
) { | ||
queryResult = handleCSS3({ | ||
req, fileContents, forceJSON, wrapFragment | ||
}); | ||
} else { | ||
// Text | ||
queryResult = fileContents.toString('utf8'); | ||
} | ||
finish(); | ||
finish(queryResult); | ||
}; | ||
@@ -201,0 +292,0 @@ } |
{ | ||
"name": "httpquery", | ||
"version": "0.6.3", | ||
"version": "0.7.0", | ||
"author": "Brett Zamir", | ||
@@ -84,3 +84,3 @@ "contributors": [], | ||
}, | ||
"readme": "# httpquery\n\n***Note that this is still in alpha stages, so may be unstable or\nnon-functional***\n\n*HTTP query protocol with proof-of-concept implementations obtaining\nsubsets of remote HTML data via XPath or CSS Selectors, essentially\nproviding the likes of a native XML database, but without need for any\nimporting of data (the server will simply read your static HTML/XML files\non demand and deliver a subset of this data as queried by the user\nor application).*\n\n## CLI\n\n![cli.svg](https://raw.githubusercontent.com/brettz9/httpquery/master/cli.svg?sanitize=true)\n\n## Components\n\nHTTPQuery is an *experimental* protocol with the following tools:\n* Proof-of-concept **Firefox addon** (web app to\ncome) to allow remote HTTPQueries without access\nrestrictions\n* **Node.js and PHP server file handler implementations** to allow remote queries to\nbe made to obtain subsets of HTML or XML data via XPath or CSS selector\nsyntax (currently XPath is for XML/XHTML only; CSS Selectors for HTML only?).\nStatic HTML/XML files can be read from the desktop before\nbeing transformed by the client-submitted XPath\nor CSS Selectors query. Files with extension \"html\", \"xhtml\", \"xml\",\nor \"tei\" are currently recognized (files placed within the respective\nserver subfolder (\"Node\" or \"PHP\")). Please see their respective README's.\n\nA PHP demo server is also planned.\n\n## INTRODUCTION (IMPORTANT)\n\nDespite the fact that the ubiquitous files of the web, HTML files, are\nTHEMSELVES databases, there has been a curious lack of ability\nto query these files without first needing to enter their contents into\na database or for a consumer to be forced to download the entire\nfile and then obtain the subset they desire. Even when\ntime has been taken to enter file contents into a database, users\nare often hamstrung by developer decisions, as they are not usually\nempowered to run arbitrary queries.\n\nThis HTTP Query protocol, with reference Node.js and Firefox client\nimplementation are meant to provide users and developers with\na means to overcome these barriers and limitations by letting your\nusers by default query any document that you allow in the manner\nthey wish, and with the default behavior allowing you to keep your\ndata in simple static files, such as arbitrary HTML files or, on\nthe other hand, HTML files shaped in a manner more similar\nin structure to traditional simpler databases (e.g., an HTML\nfile consisting solely of a single table, hierarchical list, etc.).\n\nOther possible uses may include selective spidering.\n\n**Note that as mentioned the protocol syntax as well as tools are still\nvery much experimental and are used at your own risk. Allowing\narbitrary XPath or CSS Selector syntax may present some\nincreased risk of DDOS attacks.**\n\nThe Web IS a database, and it is about time that\nits data becomes opened--for the humblest content creator\nto experienced mashup developers.\n\n## Future goals (general)\n\nWhile the first goal is to allow regular website content creators to\nhave their content available to searches--with HTML/XML being\nthe inevitable document-centric format, JSON support (via\nJSONPath / RQL?) is also envisaged.\n\nIt is also hoped, whether through minor markup changes to schema\nattachment, intelligent widgets may become more of a norm in exposing\nsophisticated, offlineable, type-aware and paginated widgets which do\nnot depend on the content creator being themselves a developer for\nthis functionality to be made available to users.\n\nSee the todos for more future goals for the project.\n\n## FAQ\n\n- *Why require headers rather than GET-friendly bookmarkable/shareable request parameters?* - I wanted the protocol to be able to overlay any dynamic as well as static system which might already be using its own request parameters. However, I would like to see a non-HTTP web protocol be created to work with these headers.\n\n- *If I generate my data dynamically (e.g., because I have files too large to be efficiently queried against a static file), how is the protocol still useful?* - The query mechanism and API will still be reusable by local apps (or remote ones such as the Firefox add-on if the server is enabled in a manner like the included Node server), code libraries, etc., even if you do not wish to restrict yourself to static files. For example, even though your API might filter the raw data as it is, an HTTPQuery could be allowed to run on top of that filtered data.\n\n- *Why not use OData?* - While OData has pioneered work in this direction, it is hoped that this simple protocol will gain support and allow\npiecemeal selection of content in a manner reusable by servers and clients with an absolutely bare minimum of effort by content\ncreators (and even implementers).\n\n## Informal, tentative specification for HTTP Query headers\n\n1. The client MAY submit a **query-client-support** header including a whitespace-separated list of supported query mechanisms (currently `xpath1` and `css3`). The HTTPQuery server MUST NOT require this header when other HTTPQuery queries are supplied. (The server MAY utilize the client support header to display minimal content by default since the client user is assumed to be familiar with his own browser's capabilities in utilizing the protocol to query only what he needs. The header **query-full-request** MAY be submitted (instead or in addition) by the client to counter-act this assumption to display minimal content. If the client wishes to make the request for minimal data explicit, it can make a HEAD request.)\n2. The server SHOULD advertise **query-server-support** with a comma-separated list of supported query types (currently `xpath1`, `css3`, and `jsonata`) before specific queries are made and MUST advertise the header when queries are successfully returned (and SHOULD return the header if there is a failure). This information MAY be used by clients to inform users of the query mechanisms available to them for the site.\n3. Requests are made by headers of the form, \"query-request-<QUERY MECHANISM>\". Clients and servers should support **query-request-xpath1** and **query-request-css3** and MAY support other custom mechanisms.\n4. Since queries may return node sets, the question arises as to how to group nodes in the results. In the case of normal HTML payloads, a query-supporting server MUST join together XPath1 and CSS3 query results as a string and without a separator between elements. In the case of normal XML payloads, since well-formedness will typically be expected and it is possible that more than one item is returned (i.e., without a single root node), a query-supporting server MUST wrap the resulting XML element(s) within a `div` element in the XHTML namespace (i.e., within `<div xmlns=\"http://www.w3.org/1999/xhtml\"></div>`). The query-supporting server of XPath1 or CSS3 queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` which will deliver the XML or HTML results in the JSON format while also recognizing the header **query-content-type** which will indicate the content-type of the wrapped fragments (i.e., text/html or an XML MIME type) as distinct from the regular **Content-Type** header which for JSON should be `application/json`.\n5. The query-supporting server for CSS3 queries MUST support two extensions described below for obtaining an attribute value or text nodes. In such cases, the format will be a string. The query-supporting server of such queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` so as to deliver the string in JSON format. A **query-content-type** response header MAY be provided if set to `text/plain`. (Headers may be added in the future to distinguish whether JSON delivery should concatenate text node results into a single string or not.)\n\n## CSS Selector modifications\n\nThe CSS Selector syntax has been modified to include the following\npseudo-classes:\n\n* **attr(...)** - Grab the actual attribute content (of the first attribute\nin the node set). This is necessary since attribute selectors are used\nin CSS to target elements rather than attributes.\n* **text()** - Grab the text nodes within the node set\n\n## Comparison with OData\n\nHTTP Query is a much lighter protocol. HTTP Query does hope to eventually support modification as does OData,\nbut in a web-friendly, hierarchical manner such as with https://github.com/kriszyp/put-selector.\n\n(INCOMPLETE)\n\n## Ideas for possible future todos\n\n1. i18n-ize\n1. Add tests (especially ensuring content-type works properly with each mode)\n1. Add an Ajax site-independent web application, including ability to supply arbitrary URLs with cross-site headers or making AsYouWish requests (would be facilitated by https://bugzilla.mozilla.org/show_bug.cgi?id=880908 ; see also https://bugzilla.mozilla.org/show_bug.cgi?id=855936 )\n * Do demos against HTML tables, HTML Microdata, TEI (XML)\n1. Server todos:\n * Make the Node.js implementation wrappable for use with other existing dynamic code.\n * Make the PHP implementation more easily wrappable for use with dynamic code.\n * Contemplate what error/code to return (instead of full text) if user submits query header but not supported\n * Get XPath to work with HTML DOM and get CSS Selectors to work with XML (if it cannot already)?); test on (Linux) environment with jsdom\n * Fix local xpath query \"//a/text()\" or \"//a/@href\" (ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these apparently)\n * Allow CSS3 :text() nodes to be returned as an array of nodes for JSON (local and remote); allow explicit :html() ?\n * Get server to resolve new HTML Includes (or XInclude's) (and entities?) server-side before performing queries\n * Support by cross-domain access by default (since presence of headers already implies at least some flexibility in querying)?\n * Ability to send Relative XPaths (or CSS Sel.), so if file really big, can start at a certain point\n * Store user access in simple text file and use to check along with BrowserID (not related to protocol but another \"powerful-by-default\" feature)\n * Tool to auto-generate XML schema for SQL database table along with a single raw `<table>` export URL (but only enabling downloading\n within limits (see limits below); XPath/CSS Selectors (or paginating query mechanism, etc.) can then be translated back into equivalent SQL.\n1. Add-on todos:\n * Confirm why queries aren't working for some sites and respond accordingly? (e.g., Yahoo and StackOverflow are detecting automatic Ajax header?)\n * Allow JSON format to be displayed as actual application/json content-type (and XML as application/xml)\n * Query input\n * XPath (or CSS Sel.) syntax coloring? (also update regex coloring for CodeMirror!)\n * XPath (or CSS Sel.) with auto-complete based on header-associated schema (including for HTML-treated-as-XML?) or at least general awareness of language/content-type (HTML/XML)\n * Page-specific preferences on whether to send appropriate headers to load HTTPQuery-supporting sites as empty (or full) by default (instead of possible Ajax pagination by the server); selectively advertise support headers (or at least minimize types on which the \"http-on-modify-request\" header is sent)?\n1. Protocol enhancements\n * JSON support (via JSONPath / RQL?)\n * Schema attachment/markup enhancements for intelligent, type-aware, paginated, offlineable widgets:\n * Schema attachment (or markup) used by browser (or server) to make suitable query interface\n * Server indicates header-specified RelaxNG, Schematron for starters, and browser delivers simultaneously with content if possible\n * Schema-awareness by browser to transform current document into queryable doc could work even if doc only partly loaded (or offline)\n * Types:\n 1. Tables\n * Browser displays the requested data inline with already-loaded data, or as requested by user (for file download, separate dialog, etc.?)\n * Allow mashable plugins, e.g., for user providing their own Excel-like automated columns (e.g., if user wanted all tables to allow a given column's data to be translated word-for-word and added as a new column)\n 1. Lists\n * Hierarchical drill-down for browsing and search; also as requested by user (for file download, separate dialog, etc.?)\n 1. Numbered paragraphs\n * Detect paragraph elements within a file and auto-number them (or use an attribute)--e.g., TEI's `<p n=\"\">` for an automatic\n paragraph range selection interface\n 1. Arbitrary but type-aware queries (e.g., use a date selector for finding all dates within a range (of any element or a given element anywhere in the document)\n * Allow both browser-side and server-side overlays (strip at least some markup server-side if handling server-side so client doesn't try to redo); might use headers to detect whether to let user use their own browser-supplied one or some Ajax-based, simulating widget; use Custom Elements?\n * Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror allowing inline querying and modification of data for a given large document without needing to load it all into the IDE view unless desired. Schema-driven input could also facilitate more common use of schemas with the query protocol (e.g., the schema for RelaxNG or Schematron could provide auto-complete or XSL on a schema could build a form for input).\n * WYSIWYG table editor to allow adding of types (as well as max, starting point, etc.), so average users can create databases (and schematic info) easily in HTML\n * Some kind of auto-update mechanism for offline storage? (OData ideas?)\n * Limits\n * Client-side size limits - e.g., normally download full load for offline caching (a particular site?) unless over 200 MB/table, etc.\n * Server indicates support limitations (e. g., size limits, max rows/request (page) for tables, lists, etc.) and server ignores if user disregards\n * Allow server (or browser?) to read header or markup provided XPointers to find only specific elements supporting querying/pagination, etc. and with their limits\n * Possible default behaviors: avoid resolving includes?, default row count/size per server, or page-specified suggestions for partial loading and query points)\n * If the HTML is already database-generated, the server could use its own default number of rows/records/size\n * Offline\n * Coordinate full (or even partial) delivery for offline caching and querying (with automatic detection of offline mode, but also option to query offline even if online)\n * Ensure offline storage works with data added after (and before) page load\n * Add-on to allow any page stored for offline use (and cached in user-selected collections); ensure one can also store results when making selective queries\n * (XQuery/XSL/XProc or) jQuery-like syntax for more developer or user-driven complex, server-side reshaping (along with XPath/CSS Selectors) including mashups, though this presents even more challenges re: security\n * Include ability to include & mix other sources declaratively yet query together - e.g., protocol to send current doc to XSL as param to show automated cols\n * Allow data modification, e.g., something friendly like https://github.com/kriszyp/put-selector\n * Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to request and reshape foreign sites with user permission\n * Integrate into privileged AsYouWish HTML pages\n * Add jQuery-like syntax option into add-on dialog with option to save as ayw-HTML (or create HTML content-type based on JS alone without <script></script>) (and then do for my own JML HTML-as-JSON content-type)\n * Other related protocols\n * Implement a related file search protocol to search all files in a folder, etc. (On the desktop, see an analogous proposal for Firefox desktop search, at https://bugzilla.mozilla.org/show_bug.cgi?id=878626 . Implement via Gopher (or METS)-like protocol? Check for <link/> to advertise support and thereby show interface?\n * Consider headers/protocols where you can get just what you want (e.g., Gopher, XMPP Data Forms), but with option for author to surround with arbitrary HTML\n" | ||
"readme": "# httpquery\n\n***Note that this is still in alpha stages, so may be unstable or\nnon-functional; the PHP and add-on, in particular, are not currently\nfunctional.***\n\n*HTTP query protocol with proof-of-concept implementations obtaining\nsubsets of remote HTML data via XPath or CSS Selectors, essentially\nproviding the likes of a native XML database, but without need for any\nimporting of data (the server will simply read your static HTML/XML files\non demand and deliver a subset of this data as queried by the user\nor application).*\n\n## CLI\n\n![cli.svg](https://raw.githubusercontent.com/brettz9/httpquery/master/cli.svg?sanitize=true)\n\n## Components\n\nHTTPQuery is an *experimental* protocol with the following tools:\n- Proof-of-concept **Firefox addon** (web app to\ncome) to allow remote HTTPQueries without access\nrestrictions\n- **Node.js and PHP server file handler implementations** to allow remote\nqueries to be made to obtain subsets of HTML or XML data via XPath or CSS\nselector syntax (currently XPath is for XML/XHTML only; CSS Selectors for\nHTML only?). Static HTML/XML files can be read from the desktop before\nbeing transformed by the client-submitted XPath or CSS Selectors query.\nFiles with extension \"html\", \"xhtml\", \"xml\", or \"tei\" are currently recognized\n(files placed within the respective server subfolder (\"Node\" or \"PHP\")).\nPlease see their respective README's.\n\nA PHP demo server is also planned.\n\n## INTRODUCTION (IMPORTANT)\n\nDespite the fact that the ubiquitous files of the web, HTML files, are\nTHEMSELVES databases, there has been a curious lack of ability\nto query these files without first needing to enter their contents into\na database or for a consumer to be forced to download the entire\nfile and then obtain the subset they desire. Even when\ntime has been taken to enter file contents into a database, users\nare often hamstrung by developer decisions, as they are not usually\nempowered to run arbitrary queries.\n\nThis HTTP Query protocol, with reference Node.js and Firefox client\nimplementation are meant to provide users and developers with\na means to overcome these barriers and limitations by letting your\nusers by default query any document that you allow in the manner\nthey wish, and with the default behavior allowing you to keep your\ndata in simple static files, such as arbitrary HTML files or, on\nthe other hand, HTML files shaped in a manner more similar\nin structure to traditional simpler databases (e.g., an HTML\nfile consisting solely of a single table, hierarchical list, etc.).\n\nOther possible uses may include selective spidering.\n\n**Note that as mentioned the protocol syntax as well as tools are still\nvery much experimental and are used at your own risk. Allowing\narbitrary XPath or CSS Selector syntax may present some\nincreased risk of DDOS attacks.**\n\nThe Web IS a database, and it is about time that\nits data becomes opened--for the humblest content creator\nto experienced mashup developers.\n\n## Future goals (general)\n\nWhile the first goal is to allow regular website content creators to\nhave their content available to searches--with HTML/XML being\nthe inevitable document-centric format, JSON support (via\nJSONPath / RQL?) is also envisaged.\n\nIt is also hoped, whether through minor markup changes to schema\nattachment, intelligent widgets may become more of a norm in exposing\nsophisticated, offlineable, type-aware and paginated widgets which do\nnot depend on the content creator being themselves a developer for\nthis functionality to be made available to users.\n\nSee the todos for more future goals for the project.\n\n## FAQ\n\n- *Why require headers rather than GET-friendly bookmarkable/shareable*\n *request parameters?* - I wanted the protocol to be able to overlay any\n dynamic as well as static system which might already be using its own\n request parameters. However, I would like to see a non-HTTP web protocol\n be created to work with these headers.\n\n- *If I generate my data dynamically (e.g., because I have files too large*\n *to be efficiently queried against a static file), how is the protocol*\n *still useful?* - The query mechanism and API will still be reusable by\n local apps (or remote ones such as the Firefox add-on if the server is\n enabled in a manner like the included Node server), code libraries, etc.,\n even if you do not wish to restrict yourself to static files. For example,\n even though your API might filter the raw data as it is, an HTTPQuery could\n be allowed to run on top of that filtered data.\n\n- *Why not use OData?* - While OData has pioneered work in this direction,\n it is hoped that this simple protocol will gain support and allow piecemeal\n selection of content in a manner reusable by servers and clients with an\n absolutely bare minimum of effort by content creators (and even\n implementers).\n\n## Informal, tentative specification for HTTP Query headers\n\n1. The client MAY submit a **query-client-support** header including a\n whitespace-separated list of supported query mechanisms (currently `xpath1`\n and `css3`). The HTTPQuery server MUST NOT require this header when other\n HTTPQuery queries are supplied. (The server MAY utilize the client support\n header to display minimal content by default since the client user is assumed\n to be familiar with his own browser's capabilities in utilizing the protocol\n to query only what he needs. The header **query-full-request** MAY be\n submitted (instead or in addition) by the client to counter-act this\n assumption to display minimal content. If the client wishes to make the\n request for minimal data explicit, it can make a HEAD request.)\n2. The server SHOULD advertise **query-server-support** with a comma-separated\n list of supported query types (currently `xpath1`, `css3`, and `jsonata`)\n before specific queries are made and MUST advertise the header when queries\n are successfully returned (and SHOULD return the header if there is a\n failure). This information MAY be used by clients to inform users of the\n query mechanisms available to them for the site.\n3. Requests are made by headers of the form, \"query-request-<QUERY MECHANISM>\".\n Clients and servers should support **query-request-xpath1** and\n **query-request-css3** and MAY support other custom mechanisms.\n4. Since queries may return node sets, the question arises as to how to group\n nodes in the results. In the case of normal HTML payloads, a query-supporting\n server MUST join together XPath1 and CSS3 query results as a string and\n without a separator between elements. In the case of normal XML payloads,\n since well-formedness will typically be expected and it is possible that\n more than one item is returned (i.e., without a single root node), a\n query-supporting server MUST wrap the resulting XML element(s) within a\n `div` element in the XHTML namespace (i.e., within\n `<div xmlns=\"http://www.w3.org/1999/xhtml\"></div>`). The query-supporting\n server of XPath1 or CSS3 queries MUST also support the ability to recognize\n an additional client-supplied header, **query-format** set to the value\n `json` which will deliver the XML or HTML results in the JSON format\n while also recognizing the header **query-content-type** which will\n indicate the content-type of the wrapped fragments (i.e., text/html or an\n XML MIME type) as distinct from the regular **Content-Type** header\n which for JSON should be `application/json`.\n5. The query-supporting server for CSS3 queries MUST support two extensions\n described below for obtaining an attribute value or text nodes. In such\n cases, the format will be a string. The query-supporting server of such\n queries MUST also support the ability to recognize an additional\n client-supplied header, **query-format** set to the value `json` so as to\n deliver the string in JSON format. A **query-content-type** response header\n MAY be provided if set to `text/plain`. (Headers may be added in the future\n to distinguish whether JSON delivery should concatenate text node results\n into a single string or not.)\n\n## CSS Selector modifications\n\nThe CSS Selector syntax has been modified to include the following\npseudo-classes:\n\n- **attr(...)** - Grab the actual attribute content (of the first attribute\nin the node set). This is necessary since attribute selectors are used\nin CSS to target elements rather than attributes.\n- **text()** - Grab the text nodes within the node set\n\n## Comparison with OData\n\nHTTP Query is a much lighter protocol. HTTP Query does hope to eventually\nsupport modification as does OData, but in a web-friendly, hierarchical manner\nsuch as with <https://github.com/kriszyp/put-selector>.\n\n(INCOMPLETE)\n\n## Ideas for possible future todos\n\n1. Support **[JSONiq](https://www.jsoniq.org/)** (XQuery-like power for JSON)\n1. Support **XQuery** (via\n [https://github.com/FontoXML/fontoxpath](fontoxpath)?)\n1. **Restore add-on** (as webextension) so can make queries and joins even to\n non-httpquery sites!\n1. i18n-ize\n1. Add tests (especially ensuring content-type works properly with each mode)\n1. Add an Ajax site-independent web application, including ability to supply\n arbitrary URLs with cross-site headers or making AsYouWish requests (would\n be facilitated by <https://bugzilla.mozilla.org/show_bug.cgi?id=880908>; see\n also <https://bugzilla.mozilla.org/show_bug.cgi?id=855936>)\n - Do demos against HTML tables, HTML Microdata, TEI (XML)\n1. Server todos:\n - Make the Node.js implementation wrappable for use with other existing\n dynamic code.\n - Make the PHP implementation more easily wrappable for use with dynamic\n code.\n - Contemplate what error/code to return (instead of full text) if user\n submits query header but not supported\n - Get XPath to work with HTML DOM and get CSS Selectors to work with XML\n (if it cannot already)?); test on (Linux) environment with jsdom\n - Fix local xpath query \"//a/text()\" or \"//a/@href\"\n (ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these\n apparently)\n - Allow CSS3 :text() nodes to be returned as an array of nodes for JSON\n (local and remote); allow explicit :html() ?\n - Get server to resolve new HTML Includes (or XInclude's) (and entities?)\n server-side before performing queries\n - Support by cross-domain access by default (since presence of headers\n already implies at least some flexibility in querying)?\n - Ability to send Relative XPaths (or CSS Sel.), so if file really big,\n can start at a certain point\n - Store user access in simple text file and use to check along with\n BrowserID (not related to protocol but another \"powerful-by-default\"\n feature)\n - Tool to auto-generate XML schema for SQL database table along with a\n single raw `<table>` export URL (but only enabling downloading within\n limits (see limits below); XPath/CSS Selectors (or paginating query\n mechanism, etc.) can then be translated back into equivalent SQL.\n1. Add-on todos:\n - Confirm why queries aren't working for some sites and respond\n accordingly? (e.g., Yahoo and StackOverflow are detecting automatic\n Ajax header?)\n - Allow JSON format to be displayed as actual application/json\n content-type (and XML as application/xml)\n - Query input\n - XPath (or CSS Sel.) syntax coloring? (also update regex coloring for\n CodeMirror!)\n - XPath (or CSS Sel.) with auto-complete based on header-associated\n schema (including for HTML-treated-as-XML?) or at least general\n awareness of language/content-type (HTML/XML)\n - Page-specific preferences on whether to send appropriate headers to\n load HTTPQuery-supporting sites as empty (or full) by default (instead\n of possible Ajax pagination by the server); selectively advertise\n support headers (or at least minimize types on which the\n \"http-on-modify-request\" header is sent)?\n1. Protocol enhancements\n - JSON support (via JSONPath / RQL?)\n - Schema attachment/markup enhancements for intelligent, type-aware,\n paginated, offlineable widgets:\n - Schema attachment (or markup) used by browser (or server) to make\n suitable query interface\n - Server indicates header-specified RelaxNG, Schematron for\n starters, and browser delivers simultaneously with content\n if possible\n - Schema-awareness by browser to transform current document into\n queryable doc could work even if doc only partly loaded (or\n offline)\n - Types:\n 1. Tables\n - Browser displays the requested data inline with\n already-loaded data, or as requested by user (for file\n download, separate dialog, etc.?)\n - Allow mashable plugins, e.g., for user providing their\n own Excel-like automated columns (e.g., if user wanted\n all tables to allow a given column's data to be\n translated word-for-word and added as a new column)\n 1. Lists\n - Hierarchical drill-down for browsing and search; also\n as requested by user (for file download, separate\n dialog, etc.?)\n 1. Numbered paragraphs\n - Detect paragraph elements within a file and auto-number\n them (or use an attribute)--e.g., TEI's `<p n=\"\">` for\n an automatic\n paragraph range selection interface\n 1. Arbitrary but type-aware queries (e.g., use a date selector\n for finding all dates within a range (of any element or\n a given element anywhere in the document)\n - Allow both browser-side and server-side overlays (strip at\n least some markup server-side if handling server-side so client\n doesn't try to redo); might use headers to detect whether to\n let user use their own browser-supplied one or some Ajax-based,\n simulating widget; use Custom Elements?\n - Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror\n allowing inline querying and modification of data for a given\n large document without needing to load it all into the IDE view\n unless desired. Schema-driven input could also facilitate more\n common use of schemas with the query protocol (e.g., the schema\n for RelaxNG or Schematron could provide auto-complete or XSL on\n a schema could build a form for input).\n - WYSIWYG table editor to allow adding of types (as well as\n max, starting point, etc.), so average users can create\n databases (and schematic info) easily in HTML\n - Some kind of auto-update mechanism for offline storage? (OData\n ideas?)\n - Limits\n - Client-side size limits - e.g., normally download full load\n for offline caching (a particular site?) unless over\n 200 MB/table, etc.\n - Server indicates support limitations (e. g., size limits,\n max rows/request (page) for tables, lists, etc.) and server\n ignores if user disregards\n - Allow server (or browser?) to read header or markup\n provided XPointers to find only specific elements\n supporting querying/pagination, etc. and with their\n limits\n - Possible default behaviors: avoid resolving includes?,\n default row count/size per server, or page-specified\n suggestions for partial loading and query points)\n - If the HTML is already database-generated, the server\n could use its own default number of rows/records/size\n - Offline\n - Coordinate full (or even partial) delivery for offline\n caching and querying (with automatic detection of offline\n mode, but also option to query offline even if online)\n - Ensure offline storage works with data added after (and\n before) page load\n - Add-on to allow any page stored for offline use (and cached\n in user-selected collections); ensure one can also store\n results when making selective queries\n - (XQuery/XSL/XProc or) jQuery-like syntax for more developer or\n user-driven complex, server-side reshaping (along with XPath/CSS\n Selectors) including mashups, though this presents even more challenges\n re: security\n - Include ability to include & mix other sources declaratively yet\n query together - e.g., protocol to send current doc to XSL as\n param to show automated cols\n - Allow data modification, e.g., something friendly like\n <https://github.com/kriszyp/put-selector>\n - Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to\n request and reshape foreign sites with user permission\n - Integrate into privileged AsYouWish HTML pages\n - Add jQuery-like syntax option into add-on dialog with option to save\n as ayw-HTML (or create HTML content-type based on JS alone without\n `<script></script>`) (and then do for my own JML HTML-as-JSON\n content-type)\n - Other related protocols\n - Implement a related file search protocol to search all files in a\n folder, etc. (On the desktop, see an analogous proposal for Firefox\n desktop search, at\n <https://bugzilla.mozilla.org/show_bug.cgi?id=878626>. Implement via\n Gopher (or METS)-like protocol? Check for <link/> to advertise\n support and thereby show interface?\n - Consider headers/protocols where you can get just what you want\n (e.g., Gopher, XMPP Data Forms), but with option for author to\n surround with arbitrary HTML\n" | ||
} |
304
README.md
# httpquery | ||
***Note that this is still in alpha stages, so may be unstable or | ||
non-functional*** | ||
non-functional; the PHP and add-on, in particular, are not currently | ||
functional.*** | ||
@@ -20,13 +21,13 @@ *HTTP query protocol with proof-of-concept implementations obtaining | ||
HTTPQuery is an *experimental* protocol with the following tools: | ||
* Proof-of-concept **Firefox addon** (web app to | ||
- Proof-of-concept **Firefox addon** (web app to | ||
come) to allow remote HTTPQueries without access | ||
restrictions | ||
* **Node.js and PHP server file handler implementations** to allow remote queries to | ||
be made to obtain subsets of HTML or XML data via XPath or CSS selector | ||
syntax (currently XPath is for XML/XHTML only; CSS Selectors for HTML only?). | ||
Static HTML/XML files can be read from the desktop before | ||
being transformed by the client-submitted XPath | ||
or CSS Selectors query. Files with extension "html", "xhtml", "xml", | ||
or "tei" are currently recognized (files placed within the respective | ||
server subfolder ("Node" or "PHP")). Please see their respective README's. | ||
- **Node.js and PHP server file handler implementations** to allow remote | ||
queries to be made to obtain subsets of HTML or XML data via XPath or CSS | ||
selector syntax (currently XPath is for XML/XHTML only; CSS Selectors for | ||
HTML only?). Static HTML/XML files can be read from the desktop before | ||
being transformed by the client-submitted XPath or CSS Selectors query. | ||
Files with extension "html", "xhtml", "xml", or "tei" are currently recognized | ||
(files placed within the respective server subfolder ("Node" or "PHP")). | ||
Please see their respective README's. | ||
@@ -84,17 +85,69 @@ A PHP demo server is also planned. | ||
- *Why require headers rather than GET-friendly bookmarkable/shareable request parameters?* - I wanted the protocol to be able to overlay any dynamic as well as static system which might already be using its own request parameters. However, I would like to see a non-HTTP web protocol be created to work with these headers. | ||
- *Why require headers rather than GET-friendly bookmarkable/shareable* | ||
*request parameters?* - I wanted the protocol to be able to overlay any | ||
dynamic as well as static system which might already be using its own | ||
request parameters. However, I would like to see a non-HTTP web protocol | ||
be created to work with these headers. | ||
- *If I generate my data dynamically (e.g., because I have files too large to be efficiently queried against a static file), how is the protocol still useful?* - The query mechanism and API will still be reusable by local apps (or remote ones such as the Firefox add-on if the server is enabled in a manner like the included Node server), code libraries, etc., even if you do not wish to restrict yourself to static files. For example, even though your API might filter the raw data as it is, an HTTPQuery could be allowed to run on top of that filtered data. | ||
- *If I generate my data dynamically (e.g., because I have files too large* | ||
*to be efficiently queried against a static file), how is the protocol* | ||
*still useful?* - The query mechanism and API will still be reusable by | ||
local apps (or remote ones such as the Firefox add-on if the server is | ||
enabled in a manner like the included Node server), code libraries, etc., | ||
even if you do not wish to restrict yourself to static files. For example, | ||
even though your API might filter the raw data as it is, an HTTPQuery could | ||
be allowed to run on top of that filtered data. | ||
- *Why not use OData?* - While OData has pioneered work in this direction, it is hoped that this simple protocol will gain support and allow | ||
piecemeal selection of content in a manner reusable by servers and clients with an absolutely bare minimum of effort by content | ||
creators (and even implementers). | ||
- *Why not use OData?* - While OData has pioneered work in this direction, | ||
it is hoped that this simple protocol will gain support and allow piecemeal | ||
selection of content in a manner reusable by servers and clients with an | ||
absolutely bare minimum of effort by content creators (and even | ||
implementers). | ||
## Informal, tentative specification for HTTP Query headers | ||
1. The client MAY submit a **query-client-support** header including a whitespace-separated list of supported query mechanisms (currently `xpath1` and `css3`). The HTTPQuery server MUST NOT require this header when other HTTPQuery queries are supplied. (The server MAY utilize the client support header to display minimal content by default since the client user is assumed to be familiar with his own browser's capabilities in utilizing the protocol to query only what he needs. The header **query-full-request** MAY be submitted (instead or in addition) by the client to counter-act this assumption to display minimal content. If the client wishes to make the request for minimal data explicit, it can make a HEAD request.) | ||
2. The server SHOULD advertise **query-server-support** with a comma-separated list of supported query types (currently `xpath1`, `css3`, and `jsonata`) before specific queries are made and MUST advertise the header when queries are successfully returned (and SHOULD return the header if there is a failure). This information MAY be used by clients to inform users of the query mechanisms available to them for the site. | ||
3. Requests are made by headers of the form, "query-request-<QUERY MECHANISM>". Clients and servers should support **query-request-xpath1** and **query-request-css3** and MAY support other custom mechanisms. | ||
4. Since queries may return node sets, the question arises as to how to group nodes in the results. In the case of normal HTML payloads, a query-supporting server MUST join together XPath1 and CSS3 query results as a string and without a separator between elements. In the case of normal XML payloads, since well-formedness will typically be expected and it is possible that more than one item is returned (i.e., without a single root node), a query-supporting server MUST wrap the resulting XML element(s) within a `div` element in the XHTML namespace (i.e., within `<div xmlns="http://www.w3.org/1999/xhtml"></div>`). The query-supporting server of XPath1 or CSS3 queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` which will deliver the XML or HTML results in the JSON format while also recognizing the header **query-content-type** which will indicate the content-type of the wrapped fragments (i.e., text/html or an XML MIME type) as distinct from the regular **Content-Type** header which for JSON should be `application/json`. | ||
5. The query-supporting server for CSS3 queries MUST support two extensions described below for obtaining an attribute value or text nodes. In such cases, the format will be a string. The query-supporting server of such queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` so as to deliver the string in JSON format. A **query-content-type** response header MAY be provided if set to `text/plain`. (Headers may be added in the future to distinguish whether JSON delivery should concatenate text node results into a single string or not.) | ||
1. The client MAY submit a **query-client-support** header including a | ||
whitespace-separated list of supported query mechanisms (currently `xpath1` | ||
and `css3`). The HTTPQuery server MUST NOT require this header when other | ||
HTTPQuery queries are supplied. (The server MAY utilize the client support | ||
header to display minimal content by default since the client user is assumed | ||
to be familiar with his own browser's capabilities in utilizing the protocol | ||
to query only what he needs. The header **query-full-request** MAY be | ||
submitted (instead or in addition) by the client to counter-act this | ||
assumption to display minimal content. If the client wishes to make the | ||
request for minimal data explicit, it can make a HEAD request.) | ||
2. The server SHOULD advertise **query-server-support** with a comma-separated | ||
list of supported query types (currently `xpath1`, `css3`, and `jsonata`) | ||
before specific queries are made and MUST advertise the header when queries | ||
are successfully returned (and SHOULD return the header if there is a | ||
failure). This information MAY be used by clients to inform users of the | ||
query mechanisms available to them for the site. | ||
3. Requests are made by headers of the form, "query-request-<QUERY MECHANISM>". | ||
Clients and servers should support **query-request-xpath1** and | ||
**query-request-css3** and MAY support other custom mechanisms. | ||
4. Since queries may return node sets, the question arises as to how to group | ||
nodes in the results. In the case of normal HTML payloads, a query-supporting | ||
server MUST join together XPath1 and CSS3 query results as a string and | ||
without a separator between elements. In the case of normal XML payloads, | ||
since well-formedness will typically be expected and it is possible that | ||
more than one item is returned (i.e., without a single root node), a | ||
query-supporting server MUST wrap the resulting XML element(s) within a | ||
`div` element in the XHTML namespace (i.e., within | ||
`<div xmlns="http://www.w3.org/1999/xhtml"></div>`). The query-supporting | ||
server of XPath1 or CSS3 queries MUST also support the ability to recognize | ||
an additional client-supplied header, **query-format** set to the value | ||
`json` which will deliver the XML or HTML results in the JSON format | ||
while also recognizing the header **query-content-type** which will | ||
indicate the content-type of the wrapped fragments (i.e., text/html or an | ||
XML MIME type) as distinct from the regular **Content-Type** header | ||
which for JSON should be `application/json`. | ||
5. The query-supporting server for CSS3 queries MUST support two extensions | ||
described below for obtaining an attribute value or text nodes. In such | ||
cases, the format will be a string. The query-supporting server of such | ||
queries MUST also support the ability to recognize an additional | ||
client-supplied header, **query-format** set to the value `json` so as to | ||
deliver the string in JSON format. A **query-content-type** response header | ||
MAY be provided if set to `text/plain`. (Headers may be added in the future | ||
to distinguish whether JSON delivery should concatenate text node results | ||
into a single string or not.) | ||
@@ -106,11 +159,12 @@ ## CSS Selector modifications | ||
* **attr(...)** - Grab the actual attribute content (of the first attribute | ||
- **attr(...)** - Grab the actual attribute content (of the first attribute | ||
in the node set). This is necessary since attribute selectors are used | ||
in CSS to target elements rather than attributes. | ||
* **text()** - Grab the text nodes within the node set | ||
- **text()** - Grab the text nodes within the node set | ||
## Comparison with OData | ||
HTTP Query is a much lighter protocol. HTTP Query does hope to eventually support modification as does OData, | ||
but in a web-friendly, hierarchical manner such as with https://github.com/kriszyp/put-selector. | ||
HTTP Query is a much lighter protocol. HTTP Query does hope to eventually | ||
support modification as does OData, but in a web-friendly, hierarchical manner | ||
such as with <https://github.com/kriszyp/put-selector>. | ||
@@ -121,64 +175,158 @@ (INCOMPLETE) | ||
1. Support **[JSONiq](https://www.jsoniq.org/)** (XQuery-like power for JSON) | ||
1. Support **XQuery** (via | ||
[https://github.com/FontoXML/fontoxpath](fontoxpath)?) | ||
1. **Restore add-on** (as webextension) so can make queries and joins even to | ||
non-httpquery sites! | ||
1. i18n-ize | ||
1. Add tests (especially ensuring content-type works properly with each mode) | ||
1. Add an Ajax site-independent web application, including ability to supply arbitrary URLs with cross-site headers or making AsYouWish requests (would be facilitated by https://bugzilla.mozilla.org/show_bug.cgi?id=880908 ; see also https://bugzilla.mozilla.org/show_bug.cgi?id=855936 ) | ||
* Do demos against HTML tables, HTML Microdata, TEI (XML) | ||
1. Add an Ajax site-independent web application, including ability to supply | ||
arbitrary URLs with cross-site headers or making AsYouWish requests (would | ||
be facilitated by <https://bugzilla.mozilla.org/show_bug.cgi?id=880908>; see | ||
also <https://bugzilla.mozilla.org/show_bug.cgi?id=855936>) | ||
- Do demos against HTML tables, HTML Microdata, TEI (XML) | ||
1. Server todos: | ||
* Make the Node.js implementation wrappable for use with other existing dynamic code. | ||
* Make the PHP implementation more easily wrappable for use with dynamic code. | ||
* Contemplate what error/code to return (instead of full text) if user submits query header but not supported | ||
* Get XPath to work with HTML DOM and get CSS Selectors to work with XML (if it cannot already)?); test on (Linux) environment with jsdom | ||
* Fix local xpath query "//a/text()" or "//a/@href" (ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these apparently) | ||
* Allow CSS3 :text() nodes to be returned as an array of nodes for JSON (local and remote); allow explicit :html() ? | ||
* Get server to resolve new HTML Includes (or XInclude's) (and entities?) server-side before performing queries | ||
* Support by cross-domain access by default (since presence of headers already implies at least some flexibility in querying)? | ||
* Ability to send Relative XPaths (or CSS Sel.), so if file really big, can start at a certain point | ||
* Store user access in simple text file and use to check along with BrowserID (not related to protocol but another "powerful-by-default" feature) | ||
* Tool to auto-generate XML schema for SQL database table along with a single raw `<table>` export URL (but only enabling downloading | ||
within limits (see limits below); XPath/CSS Selectors (or paginating query mechanism, etc.) can then be translated back into equivalent SQL. | ||
- Make the Node.js implementation wrappable for use with other existing | ||
dynamic code. | ||
- Make the PHP implementation more easily wrappable for use with dynamic | ||
code. | ||
- Contemplate what error/code to return (instead of full text) if user | ||
submits query header but not supported | ||
- Get XPath to work with HTML DOM and get CSS Selectors to work with XML | ||
(if it cannot already)?); test on (Linux) environment with jsdom | ||
- Fix local xpath query "//a/text()" or "//a/@href" | ||
(ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these | ||
apparently) | ||
- Allow CSS3 :text() nodes to be returned as an array of nodes for JSON | ||
(local and remote); allow explicit :html() ? | ||
- Get server to resolve new HTML Includes (or XInclude's) (and entities?) | ||
server-side before performing queries | ||
- Support by cross-domain access by default (since presence of headers | ||
already implies at least some flexibility in querying)? | ||
- Ability to send Relative XPaths (or CSS Sel.), so if file really big, | ||
can start at a certain point | ||
- Store user access in simple text file and use to check along with | ||
BrowserID (not related to protocol but another "powerful-by-default" | ||
feature) | ||
- Tool to auto-generate XML schema for SQL database table along with a | ||
single raw `<table>` export URL (but only enabling downloading within | ||
limits (see limits below); XPath/CSS Selectors (or paginating query | ||
mechanism, etc.) can then be translated back into equivalent SQL. | ||
1. Add-on todos: | ||
* Confirm why queries aren't working for some sites and respond accordingly? (e.g., Yahoo and StackOverflow are detecting automatic Ajax header?) | ||
* Allow JSON format to be displayed as actual application/json content-type (and XML as application/xml) | ||
* Query input | ||
* XPath (or CSS Sel.) syntax coloring? (also update regex coloring for CodeMirror!) | ||
* XPath (or CSS Sel.) with auto-complete based on header-associated schema (including for HTML-treated-as-XML?) or at least general awareness of language/content-type (HTML/XML) | ||
* Page-specific preferences on whether to send appropriate headers to load HTTPQuery-supporting sites as empty (or full) by default (instead of possible Ajax pagination by the server); selectively advertise support headers (or at least minimize types on which the "http-on-modify-request" header is sent)? | ||
- Confirm why queries aren't working for some sites and respond | ||
accordingly? (e.g., Yahoo and StackOverflow are detecting automatic | ||
Ajax header?) | ||
- Allow JSON format to be displayed as actual application/json | ||
content-type (and XML as application/xml) | ||
- Query input | ||
- XPath (or CSS Sel.) syntax coloring? (also update regex coloring for | ||
CodeMirror!) | ||
- XPath (or CSS Sel.) with auto-complete based on header-associated | ||
schema (including for HTML-treated-as-XML?) or at least general | ||
awareness of language/content-type (HTML/XML) | ||
- Page-specific preferences on whether to send appropriate headers to | ||
load HTTPQuery-supporting sites as empty (or full) by default (instead | ||
of possible Ajax pagination by the server); selectively advertise | ||
support headers (or at least minimize types on which the | ||
"http-on-modify-request" header is sent)? | ||
1. Protocol enhancements | ||
* JSON support (via JSONPath / RQL?) | ||
* Schema attachment/markup enhancements for intelligent, type-aware, paginated, offlineable widgets: | ||
* Schema attachment (or markup) used by browser (or server) to make suitable query interface | ||
* Server indicates header-specified RelaxNG, Schematron for starters, and browser delivers simultaneously with content if possible | ||
* Schema-awareness by browser to transform current document into queryable doc could work even if doc only partly loaded (or offline) | ||
* Types: | ||
- JSON support (via JSONPath / RQL?) | ||
- Schema attachment/markup enhancements for intelligent, type-aware, | ||
paginated, offlineable widgets: | ||
- Schema attachment (or markup) used by browser (or server) to make | ||
suitable query interface | ||
- Server indicates header-specified RelaxNG, Schematron for | ||
starters, and browser delivers simultaneously with content | ||
if possible | ||
- Schema-awareness by browser to transform current document into | ||
queryable doc could work even if doc only partly loaded (or | ||
offline) | ||
- Types: | ||
1. Tables | ||
* Browser displays the requested data inline with already-loaded data, or as requested by user (for file download, separate dialog, etc.?) | ||
* Allow mashable plugins, e.g., for user providing their own Excel-like automated columns (e.g., if user wanted all tables to allow a given column's data to be translated word-for-word and added as a new column) | ||
- Browser displays the requested data inline with | ||
already-loaded data, or as requested by user (for file | ||
download, separate dialog, etc.?) | ||
- Allow mashable plugins, e.g., for user providing their | ||
own Excel-like automated columns (e.g., if user wanted | ||
all tables to allow a given column's data to be | ||
translated word-for-word and added as a new column) | ||
1. Lists | ||
* Hierarchical drill-down for browsing and search; also as requested by user (for file download, separate dialog, etc.?) | ||
- Hierarchical drill-down for browsing and search; also | ||
as requested by user (for file download, separate | ||
dialog, etc.?) | ||
1. Numbered paragraphs | ||
* Detect paragraph elements within a file and auto-number them (or use an attribute)--e.g., TEI's `<p n="">` for an automatic | ||
- Detect paragraph elements within a file and auto-number | ||
them (or use an attribute)--e.g., TEI's `<p n="">` for | ||
an automatic | ||
paragraph range selection interface | ||
1. Arbitrary but type-aware queries (e.g., use a date selector for finding all dates within a range (of any element or a given element anywhere in the document) | ||
* Allow both browser-side and server-side overlays (strip at least some markup server-side if handling server-side so client doesn't try to redo); might use headers to detect whether to let user use their own browser-supplied one or some Ajax-based, simulating widget; use Custom Elements? | ||
* Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror allowing inline querying and modification of data for a given large document without needing to load it all into the IDE view unless desired. Schema-driven input could also facilitate more common use of schemas with the query protocol (e.g., the schema for RelaxNG or Schematron could provide auto-complete or XSL on a schema could build a form for input). | ||
* WYSIWYG table editor to allow adding of types (as well as max, starting point, etc.), so average users can create databases (and schematic info) easily in HTML | ||
* Some kind of auto-update mechanism for offline storage? (OData ideas?) | ||
* Limits | ||
* Client-side size limits - e.g., normally download full load for offline caching (a particular site?) unless over 200 MB/table, etc. | ||
* Server indicates support limitations (e. g., size limits, max rows/request (page) for tables, lists, etc.) and server ignores if user disregards | ||
* Allow server (or browser?) to read header or markup provided XPointers to find only specific elements supporting querying/pagination, etc. and with their limits | ||
* Possible default behaviors: avoid resolving includes?, default row count/size per server, or page-specified suggestions for partial loading and query points) | ||
* If the HTML is already database-generated, the server could use its own default number of rows/records/size | ||
* Offline | ||
* Coordinate full (or even partial) delivery for offline caching and querying (with automatic detection of offline mode, but also option to query offline even if online) | ||
* Ensure offline storage works with data added after (and before) page load | ||
* Add-on to allow any page stored for offline use (and cached in user-selected collections); ensure one can also store results when making selective queries | ||
* (XQuery/XSL/XProc or) jQuery-like syntax for more developer or user-driven complex, server-side reshaping (along with XPath/CSS Selectors) including mashups, though this presents even more challenges re: security | ||
* Include ability to include & mix other sources declaratively yet query together - e.g., protocol to send current doc to XSL as param to show automated cols | ||
* Allow data modification, e.g., something friendly like https://github.com/kriszyp/put-selector | ||
* Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to request and reshape foreign sites with user permission | ||
* Integrate into privileged AsYouWish HTML pages | ||
* Add jQuery-like syntax option into add-on dialog with option to save as ayw-HTML (or create HTML content-type based on JS alone without <script></script>) (and then do for my own JML HTML-as-JSON content-type) | ||
* Other related protocols | ||
* Implement a related file search protocol to search all files in a folder, etc. (On the desktop, see an analogous proposal for Firefox desktop search, at https://bugzilla.mozilla.org/show_bug.cgi?id=878626 . Implement via Gopher (or METS)-like protocol? Check for <link/> to advertise support and thereby show interface? | ||
* Consider headers/protocols where you can get just what you want (e.g., Gopher, XMPP Data Forms), but with option for author to surround with arbitrary HTML | ||
1. Arbitrary but type-aware queries (e.g., use a date selector | ||
for finding all dates within a range (of any element or | ||
a given element anywhere in the document) | ||
- Allow both browser-side and server-side overlays (strip at | ||
least some markup server-side if handling server-side so client | ||
doesn't try to redo); might use headers to detect whether to | ||
let user use their own browser-supplied one or some Ajax-based, | ||
simulating widget; use Custom Elements? | ||
- Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror | ||
allowing inline querying and modification of data for a given | ||
large document without needing to load it all into the IDE view | ||
unless desired. Schema-driven input could also facilitate more | ||
common use of schemas with the query protocol (e.g., the schema | ||
for RelaxNG or Schematron could provide auto-complete or XSL on | ||
a schema could build a form for input). | ||
- WYSIWYG table editor to allow adding of types (as well as | ||
max, starting point, etc.), so average users can create | ||
databases (and schematic info) easily in HTML | ||
- Some kind of auto-update mechanism for offline storage? (OData | ||
ideas?) | ||
- Limits | ||
- Client-side size limits - e.g., normally download full load | ||
for offline caching (a particular site?) unless over | ||
200 MB/table, etc. | ||
- Server indicates support limitations (e. g., size limits, | ||
max rows/request (page) for tables, lists, etc.) and server | ||
ignores if user disregards | ||
- Allow server (or browser?) to read header or markup | ||
provided XPointers to find only specific elements | ||
supporting querying/pagination, etc. and with their | ||
limits | ||
- Possible default behaviors: avoid resolving includes?, | ||
default row count/size per server, or page-specified | ||
suggestions for partial loading and query points) | ||
- If the HTML is already database-generated, the server | ||
could use its own default number of rows/records/size | ||
- Offline | ||
- Coordinate full (or even partial) delivery for offline | ||
caching and querying (with automatic detection of offline | ||
mode, but also option to query offline even if online) | ||
- Ensure offline storage works with data added after (and | ||
before) page load | ||
- Add-on to allow any page stored for offline use (and cached | ||
in user-selected collections); ensure one can also store | ||
results when making selective queries | ||
- (XQuery/XSL/XProc or) jQuery-like syntax for more developer or | ||
user-driven complex, server-side reshaping (along with XPath/CSS | ||
Selectors) including mashups, though this presents even more challenges | ||
re: security | ||
- Include ability to include & mix other sources declaratively yet | ||
query together - e.g., protocol to send current doc to XSL as | ||
param to show automated cols | ||
- Allow data modification, e.g., something friendly like | ||
<https://github.com/kriszyp/put-selector> | ||
- Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to | ||
request and reshape foreign sites with user permission | ||
- Integrate into privileged AsYouWish HTML pages | ||
- Add jQuery-like syntax option into add-on dialog with option to save | ||
as ayw-HTML (or create HTML content-type based on JS alone without | ||
`<script></script>`) (and then do for my own JML HTML-as-JSON | ||
content-type) | ||
- Other related protocols | ||
- Implement a related file search protocol to search all files in a | ||
folder, etc. (On the desktop, see an analogous proposal for Firefox | ||
desktop search, at | ||
<https://bugzilla.mozilla.org/show_bug.cgi?id=878626>. Implement via | ||
Gopher (or METS)-like protocol? Check for <link/> to advertise | ||
support and thereby show interface? | ||
- Consider headers/protocols where you can get just what you want | ||
(e.g., Gopher, XMPP Data Forms), but with option for author to | ||
surround with arbitrary HTML |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
66669
24
349
329
2