Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

httpquery

Package Overview
Dependencies
Maintainers
1
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

httpquery - npm Package Compare versions

Comparing version 0.6.3 to 0.7.0

LICENSE-MIT.txt

5

CHANGES.md
# CHANGES to httpquery
## 0.7.0
- feat: Use any `req.jsonData` set by middleware
- refactor: Improve code readability
## 0.6.3

@@ -4,0 +9,0 @@

257

Node/index.js

@@ -1,3 +0,4 @@

// Todo: Make this integratable into a pipeline; ensure can use HTML or XML DOM with content-type accordingly
// Use JSDOM or http://zombie.labnotes.org/ ?
// Todo: More middleware passing options besides jsonData; ensure can use
// HTML or XML DOM with content-type accordingly
// Use JSDOM?

@@ -15,9 +16,111 @@ import {readFile} from 'fs/promises';

res.writeHead(code, responseHeaders);
res.end(fileContents); // + '\n'
res.end(fileContents); // + '\n'
};
const clientSupportCheck = (req, str) => {
return req.headers['query-client-support'] &&
req.headers['query-client-support'].trim().split(/\s+/u).includes(str);
req.headers['query-client-support'].trim().split(
/\s+/u
).includes(str);
};
const handleJsonata = ({
req, res, responseHeaders, fileContents, exitError, finish
}) => {
const jsonataExpression = jsonata(
req.headers['query-jsonata'].trim()
);
const bindings = req.headers['query-bindings']?.trim();
jsonataExpression.evaluate(
'jsonData' in req
? req.jsonData
: JSON.parse(fileContents.toString('utf8')),
bindings
? JSON.parse(bindings)
: {},
// eslint-disable-next-line promise/prefer-await-to-callbacks -- jsonata
(error, result) => {
if (error) {
exitError(res, responseHeaders, error.message);
return;
}
const queryResult = JSON.stringify(result);
finish(queryResult);
}
);
};
const handleXpath1 = ({
req, wrapFragment, fileContents, forceJSON
}) => {
const nodeArrayToSerializedArray = (arr) => {
return arr.map((node) => {
return node.toString();
});
};
const doc = new xmldom.DOMParser().parseFromString(String(fileContents));
const xpath1Request = req.headers['query-xpath1'] &&
req.headers['query-xpath1'].trim();
// || '//b[position() > 1 and position() < 4]'; // || '//b/text()',
let queryResult;
queryResult = xpath.select(xpath1Request, doc);
queryResult = forceJSON
? nodeArrayToSerializedArray(queryResult)
: wrapFragment(nodeArrayToSerializedArray(queryResult).join(''));
return queryResult;
};
const handleCSS3 = ({req, fileContents, forceJSON, wrapFragment}) => {
// Support our own custom :text() and :attr(...) pseudo-classes (todo: do
// as (two-colon) pseudo-elements instead)
const $ = cheerio.load(String(fileContents));
const [
,
css3Request,
type = forceJSON ? 'toArray' : 'toString',
css3Attr
] = (req.headers['query-css3'] && req.headers['query-css3'].trim().match(
// eslint-disable-next-line unicorn/no-unsafe-regex -- Todo
/(.*?)(?::(text|attr)\(([^)]*)\))?$/u
)) || []; // Allow explicit "html" (toString) or "toArray" (or "json")?
const nodeArrayToSerializedArray = (items) => {
return [...items.map((i, elem) => {
return $.html(elem);
})];
};
let queryResult;
switch (type) {
case 'attr':
// Only gets one attribute anyways, so no need to handle differently for
// JSON (except the stringify below)
queryResult = $(css3Request).attr(css3Attr);
break;
case 'toArray':
// $(css3Request).toString(); handles merging
queryResult = nodeArrayToSerializedArray($(css3Request));
break;
// Todo: Change 'text' to return array of text nodes in case of JSON?
case 'text':
queryResult = $(css3Request)[type]();
break;
case 'toString':
default:
// Don't merge with next line as intermediate queryResult may be needed
// by `wrapFragment`
queryResult = $(css3Request);
// $(css3Request).toString(); handles merging
queryResult = wrapFragment(
nodeArrayToSerializedArray(queryResult).join('')
);
break;
}
return queryResult;
};
/**

@@ -34,3 +137,6 @@ * @param {PlainObject} [cfg]

const errorMessage = debug ? err : 'ERROR';
write(res, 404, responseHeaders, '<div style="color:red;font-weight:bold">' + errorMessage + '</div>');
write(
res, 404, responseHeaders,
`<div style="color:red;font-weight:bold">${errorMessage}</div>`
);
};

@@ -62,2 +168,3 @@

const forceJSON = req.headers['query-format'] === 'json';
const resultContentType = isXHTML

@@ -74,7 +181,10 @@ ? 'application/xhtml+xml'

: 'text/html';
const responseHeaders = {
'Content-Type': isJSON || forceJSON ? 'application/json' : resultContentType
'Content-Type': isJSON || forceJSON
? 'application/json'
: resultContentType
};
const finish = () => {
const finish = (queryResult) => {
fileContents = forceJSON ? JSON.stringify(queryResult) : queryResult;

@@ -90,5 +200,9 @@

url = url.replace(/(\/|\/\?.*)$/u, '/index.html').replace(/\?.*$/u, '') || 'index.html';
// url = require('url').parse(url).pathname; // Need to strip off request parameters?
url = url.replace(/(\/|\/\?.*)$/u, '/index.html').replace(/\?.*$/u, '') ||
'index.html';
// Need to strip off request parameters?
// url = require('url').parse(url).pathname;
// console.log('url:'+url);
if (forceJSON) {

@@ -98,6 +212,13 @@ responseHeaders['query-content-type'] = resultContentType;

if (req.headers['query-client-support'] && !req.headers['query-xpath1'] && !req.headers['query-css3'] && !req.headers['query-full-request']) {
if (
req.headers['query-client-support'] && !req.headers['query-xpath1'] &&
!req.headers['query-css3'] && !req.headers['query-full-request']
) {
responseHeaders['query-server-support'] = 'xpath1, css3, jsonata';
write(res, 200, responseHeaders, ''); // Don't waste bandwidth if client supports protocol and hasn't asked us to deliver the full document
// Todo: we should allow delivery of a default payload (e.g., full doc if not specified as requesting empty for feature detection+immediate execution if supported)
// Don't waste bandwidth if client supports protocol and hasn't asked
// us to deliver the full document
write(res, 200, responseHeaders, '');
// Todo: we should allow delivery of a default payload (e.g., full
// doc if not specified as requesting empty for feature detection +
// immediate execution if supported)
} else {

@@ -113,88 +234,58 @@ responseHeaders['query-server-support'] = 'xpath1, css3, jsonata';

let fileContents;
try {
fileContents = await readFile(join(cwd, path, url));
} catch (err) {
exitError(res, responseHeaders, err.message);
return;
if (!('jsonData' in req)) {
try {
fileContents = await readFile(join(cwd, path, url));
} catch (err) {
exitError(res, responseHeaders, err.message);
return;
}
}
const wrapFragment = (frag) => {
if (isHTML) { // || queryResult.length <= 1) { // No need to wrap for HTML or single result sets as no well-formedness requirements
if (isHTML) {
// No need to wrap for HTML or single result sets as no
// well-formedness requirements
// || queryResult.length <= 1) {
return frag;
}
const tag = 'div xmlns="http://www.w3.org/1999/xhtml"';
return '<' + tag + '>' + frag + '</' + tag.match(/^\w*/u)[0] + '>';
return `<${tag}>${frag}</${
tag.match(/^\w*/u)[0]
}>`;
};
let queryResult;
if ((ignoreQuerySupport || clientJSONPathSupport) && req.headers['query-jsonata'] && !req.headers['query-full-request']) {
const jsonataExpression = jsonata(
req.headers['query-jsonata'].trim()
);
const bindings = req.headers['query-bindings']?.trim();
jsonataExpression.evaluate(
JSON.parse(fileContents.toString('utf8')),
bindings ? JSON.parse(bindings) : {},
// eslint-disable-next-line promise/prefer-await-to-callbacks -- jsonata API
(error, result) => {
if (error) {
exitError(res, responseHeaders, error.message);
return;
}
queryResult = JSON.stringify(result);
finish();
}
);
if (
(ignoreQuerySupport || clientJSONPathSupport) &&
req.headers['query-jsonata'] && !req.headers['query-full-request']
) {
handleJsonata({
req, res, responseHeaders, fileContents, exitError, finish
});
return;
} else if ((ignoreQuerySupport || clientXPath1Support) && req.headers['query-xpath1'] && !req.headers['query-full-request']) {
const nodeArrayToSerializedArray = (arr) => {
return arr.map((node) => {
return node.toString();
});
};
const doc = new xmldom.DOMParser().parseFromString(String(fileContents));
const xpath1Request = req.headers['query-xpath1'] && req.headers['query-xpath1'].trim(); // || '//b[position() > 1 and position() < 4]'; // || '//b/text()',
queryResult = xpath.select(xpath1Request, doc);
queryResult = forceJSON ? nodeArrayToSerializedArray(queryResult) : wrapFragment(nodeArrayToSerializedArray(queryResult).join(''));
} else if ((ignoreQuerySupport || clientCSS3Support) && req.headers['query-css3'] && !req.headers['query-full-request']) {
// Support our own custom :text() and :attr(...) pseudo-classes (todo: do as (two-colon) pseudo-elements instead)
const $ = cheerio.load(String(fileContents));
// eslint-disable-next-line unicorn/no-unsafe-regex -- Todo
const css3RequestFull = req.headers['query-css3'] && req.headers['query-css3'].trim().match(/(.*?)(?::(text|attr)\(([^)]*)\))?$/u); // Allow explicit "html" (toString) or "toArray" (or "json")?
const css3Request = css3RequestFull[1];
const type = css3RequestFull[2] || (forceJSON ? 'toArray' : 'toString');
const css3Attr = css3RequestFull[3];
}
const nodeArrayToSerializedArray = (items) => {
/* return arr.map((node) => {
return node; //.html();
}); */
return [...items.map((i, elem) => {
return $.html(elem);
})];
};
switch (type) {
case 'attr': // Only gets one attribute anyways, so no need to handle differently for JSON (except the stringify below)
queryResult = $(css3Request).attr(css3Attr);
break;
case 'toArray':
queryResult = nodeArrayToSerializedArray($(css3Request)); // $(css3Request).toString(); handles merging
break;
// Todo: Change 'text' to return array of text nodes in case of JSON?
case 'text':
queryResult = $(css3Request)[type]();
break;
case 'toString':
default:
queryResult = $(css3Request); // Don't merge with next line as intermediate queryResult may be needed by wrapFragment
queryResult = wrapFragment(nodeArrayToSerializedArray(queryResult).join('')); // $(css3Request).toString(); handles merging
break;
}
if (
// XPATH 1
(ignoreQuerySupport || clientXPath1Support) &&
req.headers['query-xpath1'] && !req.headers['query-full-request']
) {
queryResult = handleXpath1({
req, wrapFragment, fileContents, forceJSON
});
} else if (
// CSS3
(ignoreQuerySupport || clientCSS3Support) &&
req.headers['query-css3'] && !req.headers['query-full-request']
) {
queryResult = handleCSS3({
req, fileContents, forceJSON, wrapFragment
});
} else {
// Text
queryResult = fileContents.toString('utf8');
}
finish();
finish(queryResult);
};

@@ -201,0 +292,0 @@ }

{
"name": "httpquery",
"version": "0.6.3",
"version": "0.7.0",
"author": "Brett Zamir",

@@ -84,3 +84,3 @@ "contributors": [],

},
"readme": "# httpquery\n\n***Note that this is still in alpha stages, so may be unstable or\nnon-functional***\n\n*HTTP query protocol with proof-of-concept implementations obtaining\nsubsets of remote HTML data via XPath or CSS Selectors, essentially\nproviding the likes of a native XML database, but without need for any\nimporting of data (the server will simply read your static HTML/XML files\non demand and deliver a subset of this data as queried by the user\nor application).*\n\n## CLI\n\n![cli.svg](https://raw.githubusercontent.com/brettz9/httpquery/master/cli.svg?sanitize=true)\n\n## Components\n\nHTTPQuery is an *experimental* protocol with the following tools:\n* Proof-of-concept **Firefox addon** (web app to\ncome) to allow remote HTTPQueries without access\nrestrictions\n* **Node.js and PHP server file handler implementations** to allow remote queries to\nbe made to obtain subsets of HTML or XML data via XPath or CSS selector\nsyntax (currently XPath is for XML/XHTML only; CSS Selectors for HTML only?).\nStatic HTML/XML files can be read from the desktop before\nbeing transformed by the client-submitted XPath\nor CSS Selectors query. Files with extension \"html\", \"xhtml\", \"xml\",\nor \"tei\" are currently recognized (files placed within the respective\nserver subfolder (\"Node\" or \"PHP\")). Please see their respective README's.\n\nA PHP demo server is also planned.\n\n## INTRODUCTION (IMPORTANT)\n\nDespite the fact that the ubiquitous files of the web, HTML files, are\nTHEMSELVES databases, there has been a curious lack of ability\nto query these files without first needing to enter their contents into\na database or for a consumer to be forced to download the entire\nfile and then obtain the subset they desire. Even when\ntime has been taken to enter file contents into a database, users\nare often hamstrung by developer decisions, as they are not usually\nempowered to run arbitrary queries.\n\nThis HTTP Query protocol, with reference Node.js and Firefox client\nimplementation are meant to provide users and developers with\na means to overcome these barriers and limitations by letting your\nusers by default query any document that you allow in the manner\nthey wish, and with the default behavior allowing you to keep your\ndata in simple static files, such as arbitrary HTML files or, on\nthe other hand, HTML files shaped in a manner more similar\nin structure to traditional simpler databases (e.g., an HTML\nfile consisting solely of a single table, hierarchical list, etc.).\n\nOther possible uses may include selective spidering.\n\n**Note that as mentioned the protocol syntax as well as tools are still\nvery much experimental and are used at your own risk. Allowing\narbitrary XPath or CSS Selector syntax may present some\nincreased risk of DDOS attacks.**\n\nThe Web IS a database, and it is about time that\nits data becomes opened--for the humblest content creator\nto experienced mashup developers.\n\n## Future goals (general)\n\nWhile the first goal is to allow regular website content creators to\nhave their content available to searches--with HTML/XML being\nthe inevitable document-centric format, JSON support (via\nJSONPath / RQL?) is also envisaged.\n\nIt is also hoped, whether through minor markup changes to schema\nattachment, intelligent widgets may become more of a norm in exposing\nsophisticated, offlineable, type-aware and paginated widgets which do\nnot depend on the content creator being themselves a developer for\nthis functionality to be made available to users.\n\nSee the todos for more future goals for the project.\n\n## FAQ\n\n- *Why require headers rather than GET-friendly bookmarkable/shareable request parameters?* - I wanted the protocol to be able to overlay any dynamic as well as static system which might already be using its own request parameters. However, I would like to see a non-HTTP web protocol be created to work with these headers.\n\n- *If I generate my data dynamically (e.g., because I have files too large to be efficiently queried against a static file), how is the protocol still useful?* - The query mechanism and API will still be reusable by local apps (or remote ones such as the Firefox add-on if the server is enabled in a manner like the included Node server), code libraries, etc., even if you do not wish to restrict yourself to static files. For example, even though your API might filter the raw data as it is, an HTTPQuery could be allowed to run on top of that filtered data.\n\n- *Why not use OData?* - While OData has pioneered work in this direction, it is hoped that this simple protocol will gain support and allow\npiecemeal selection of content in a manner reusable by servers and clients with an absolutely bare minimum of effort by content\ncreators (and even implementers).\n\n## Informal, tentative specification for HTTP Query headers\n\n1. The client MAY submit a **query-client-support** header including a whitespace-separated list of supported query mechanisms (currently `xpath1` and `css3`). The HTTPQuery server MUST NOT require this header when other HTTPQuery queries are supplied. (The server MAY utilize the client support header to display minimal content by default since the client user is assumed to be familiar with his own browser's capabilities in utilizing the protocol to query only what he needs. The header **query-full-request** MAY be submitted (instead or in addition) by the client to counter-act this assumption to display minimal content. If the client wishes to make the request for minimal data explicit, it can make a HEAD request.)\n2. The server SHOULD advertise **query-server-support** with a comma-separated list of supported query types (currently `xpath1`, `css3`, and `jsonata`) before specific queries are made and MUST advertise the header when queries are successfully returned (and SHOULD return the header if there is a failure). This information MAY be used by clients to inform users of the query mechanisms available to them for the site.\n3. Requests are made by headers of the form, \"query-request-<QUERY MECHANISM>\". Clients and servers should support **query-request-xpath1** and **query-request-css3** and MAY support other custom mechanisms.\n4. Since queries may return node sets, the question arises as to how to group nodes in the results. In the case of normal HTML payloads, a query-supporting server MUST join together XPath1 and CSS3 query results as a string and without a separator between elements. In the case of normal XML payloads, since well-formedness will typically be expected and it is possible that more than one item is returned (i.e., without a single root node), a query-supporting server MUST wrap the resulting XML element(s) within a `div` element in the XHTML namespace (i.e., within `<div xmlns=\"http://www.w3.org/1999/xhtml\"></div>`). The query-supporting server of XPath1 or CSS3 queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` which will deliver the XML or HTML results in the JSON format while also recognizing the header **query-content-type** which will indicate the content-type of the wrapped fragments (i.e., text/html or an XML MIME type) as distinct from the regular **Content-Type** header which for JSON should be `application/json`.\n5. The query-supporting server for CSS3 queries MUST support two extensions described below for obtaining an attribute value or text nodes. In such cases, the format will be a string. The query-supporting server of such queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` so as to deliver the string in JSON format. A **query-content-type** response header MAY be provided if set to `text/plain`. (Headers may be added in the future to distinguish whether JSON delivery should concatenate text node results into a single string or not.)\n\n## CSS Selector modifications\n\nThe CSS Selector syntax has been modified to include the following\npseudo-classes:\n\n* **attr(...)** - Grab the actual attribute content (of the first attribute\nin the node set). This is necessary since attribute selectors are used\nin CSS to target elements rather than attributes.\n* **text()** - Grab the text nodes within the node set\n\n## Comparison with OData\n\nHTTP Query is a much lighter protocol. HTTP Query does hope to eventually support modification as does OData,\nbut in a web-friendly, hierarchical manner such as with https://github.com/kriszyp/put-selector.\n\n(INCOMPLETE)\n\n## Ideas for possible future todos\n\n1. i18n-ize\n1. Add tests (especially ensuring content-type works properly with each mode)\n1. Add an Ajax site-independent web application, including ability to supply arbitrary URLs with cross-site headers or making AsYouWish requests (would be facilitated by https://bugzilla.mozilla.org/show_bug.cgi?id=880908 ; see also https://bugzilla.mozilla.org/show_bug.cgi?id=855936 )\n * Do demos against HTML tables, HTML Microdata, TEI (XML)\n1. Server todos:\n * Make the Node.js implementation wrappable for use with other existing dynamic code.\n * Make the PHP implementation more easily wrappable for use with dynamic code.\n * Contemplate what error/code to return (instead of full text) if user submits query header but not supported\n * Get XPath to work with HTML DOM and get CSS Selectors to work with XML (if it cannot already)?); test on (Linux) environment with jsdom\n * Fix local xpath query \"//a/text()\" or \"//a/@href\" (ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these apparently)\n * Allow CSS3 :text() nodes to be returned as an array of nodes for JSON (local and remote); allow explicit :html() ?\n * Get server to resolve new HTML Includes (or XInclude's) (and entities?) server-side before performing queries\n * Support by cross-domain access by default (since presence of headers already implies at least some flexibility in querying)?\n * Ability to send Relative XPaths (or CSS Sel.), so if file really big, can start at a certain point\n * Store user access in simple text file and use to check along with BrowserID (not related to protocol but another \"powerful-by-default\" feature)\n * Tool to auto-generate XML schema for SQL database table along with a single raw `<table>` export URL (but only enabling downloading\n within limits (see limits below); XPath/CSS Selectors (or paginating query mechanism, etc.) can then be translated back into equivalent SQL.\n1. Add-on todos:\n * Confirm why queries aren't working for some sites and respond accordingly? (e.g., Yahoo and StackOverflow are detecting automatic Ajax header?)\n * Allow JSON format to be displayed as actual application/json content-type (and XML as application/xml)\n * Query input\n * XPath (or CSS Sel.) syntax coloring? (also update regex coloring for CodeMirror!)\n * XPath (or CSS Sel.) with auto-complete based on header-associated schema (including for HTML-treated-as-XML?) or at least general awareness of language/content-type (HTML/XML)\n * Page-specific preferences on whether to send appropriate headers to load HTTPQuery-supporting sites as empty (or full) by default (instead of possible Ajax pagination by the server); selectively advertise support headers (or at least minimize types on which the \"http-on-modify-request\" header is sent)?\n1. Protocol enhancements\n * JSON support (via JSONPath / RQL?)\n * Schema attachment/markup enhancements for intelligent, type-aware, paginated, offlineable widgets:\n * Schema attachment (or markup) used by browser (or server) to make suitable query interface\n * Server indicates header-specified RelaxNG, Schematron for starters, and browser delivers simultaneously with content if possible\n * Schema-awareness by browser to transform current document into queryable doc could work even if doc only partly loaded (or offline)\n * Types:\n 1. Tables\n * Browser displays the requested data inline with already-loaded data, or as requested by user (for file download, separate dialog, etc.?)\n * Allow mashable plugins, e.g., for user providing their own Excel-like automated columns (e.g., if user wanted all tables to allow a given column's data to be translated word-for-word and added as a new column)\n 1. Lists\n * Hierarchical drill-down for browsing and search; also as requested by user (for file download, separate dialog, etc.?)\n 1. Numbered paragraphs\n * Detect paragraph elements within a file and auto-number them (or use an attribute)--e.g., TEI's `<p n=\"\">` for an automatic\n paragraph range selection interface\n 1. Arbitrary but type-aware queries (e.g., use a date selector for finding all dates within a range (of any element or a given element anywhere in the document)\n * Allow both browser-side and server-side overlays (strip at least some markup server-side if handling server-side so client doesn't try to redo); might use headers to detect whether to let user use their own browser-supplied one or some Ajax-based, simulating widget; use Custom Elements?\n * Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror allowing inline querying and modification of data for a given large document without needing to load it all into the IDE view unless desired. Schema-driven input could also facilitate more common use of schemas with the query protocol (e.g., the schema for RelaxNG or Schematron could provide auto-complete or XSL on a schema could build a form for input).\n * WYSIWYG table editor to allow adding of types (as well as max, starting point, etc.), so average users can create databases (and schematic info) easily in HTML\n * Some kind of auto-update mechanism for offline storage? (OData ideas?)\n * Limits\n * Client-side size limits - e.g., normally download full load for offline caching (a particular site?) unless over 200 MB/table, etc.\n * Server indicates support limitations (e. g., size limits, max rows/request (page) for tables, lists, etc.) and server ignores if user disregards\n * Allow server (or browser?) to read header or markup provided XPointers to find only specific elements supporting querying/pagination, etc. and with their limits\n * Possible default behaviors: avoid resolving includes?, default row count/size per server, or page-specified suggestions for partial loading and query points)\n * If the HTML is already database-generated, the server could use its own default number of rows/records/size\n * Offline\n * Coordinate full (or even partial) delivery for offline caching and querying (with automatic detection of offline mode, but also option to query offline even if online)\n * Ensure offline storage works with data added after (and before) page load\n * Add-on to allow any page stored for offline use (and cached in user-selected collections); ensure one can also store results when making selective queries\n * (XQuery/XSL/XProc or) jQuery-like syntax for more developer or user-driven complex, server-side reshaping (along with XPath/CSS Selectors) including mashups, though this presents even more challenges re: security\n * Include ability to include & mix other sources declaratively yet query together - e.g., protocol to send current doc to XSL as param to show automated cols\n * Allow data modification, e.g., something friendly like https://github.com/kriszyp/put-selector\n * Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to request and reshape foreign sites with user permission\n * Integrate into privileged AsYouWish HTML pages\n * Add jQuery-like syntax option into add-on dialog with option to save as ayw-HTML (or create HTML content-type based on JS alone without <script></script>) (and then do for my own JML HTML-as-JSON content-type)\n * Other related protocols\n * Implement a related file search protocol to search all files in a folder, etc. (On the desktop, see an analogous proposal for Firefox desktop search, at https://bugzilla.mozilla.org/show_bug.cgi?id=878626 . Implement via Gopher (or METS)-like protocol? Check for <link/> to advertise support and thereby show interface?\n * Consider headers/protocols where you can get just what you want (e.g., Gopher, XMPP Data Forms), but with option for author to surround with arbitrary HTML\n"
"readme": "# httpquery\n\n***Note that this is still in alpha stages, so may be unstable or\nnon-functional; the PHP and add-on, in particular, are not currently\nfunctional.***\n\n*HTTP query protocol with proof-of-concept implementations obtaining\nsubsets of remote HTML data via XPath or CSS Selectors, essentially\nproviding the likes of a native XML database, but without need for any\nimporting of data (the server will simply read your static HTML/XML files\non demand and deliver a subset of this data as queried by the user\nor application).*\n\n## CLI\n\n![cli.svg](https://raw.githubusercontent.com/brettz9/httpquery/master/cli.svg?sanitize=true)\n\n## Components\n\nHTTPQuery is an *experimental* protocol with the following tools:\n- Proof-of-concept **Firefox addon** (web app to\ncome) to allow remote HTTPQueries without access\nrestrictions\n- **Node.js and PHP server file handler implementations** to allow remote\nqueries to be made to obtain subsets of HTML or XML data via XPath or CSS\nselector syntax (currently XPath is for XML/XHTML only; CSS Selectors for\nHTML only?). Static HTML/XML files can be read from the desktop before\nbeing transformed by the client-submitted XPath or CSS Selectors query.\nFiles with extension \"html\", \"xhtml\", \"xml\", or \"tei\" are currently recognized\n(files placed within the respective server subfolder (\"Node\" or \"PHP\")).\nPlease see their respective README's.\n\nA PHP demo server is also planned.\n\n## INTRODUCTION (IMPORTANT)\n\nDespite the fact that the ubiquitous files of the web, HTML files, are\nTHEMSELVES databases, there has been a curious lack of ability\nto query these files without first needing to enter their contents into\na database or for a consumer to be forced to download the entire\nfile and then obtain the subset they desire. Even when\ntime has been taken to enter file contents into a database, users\nare often hamstrung by developer decisions, as they are not usually\nempowered to run arbitrary queries.\n\nThis HTTP Query protocol, with reference Node.js and Firefox client\nimplementation are meant to provide users and developers with\na means to overcome these barriers and limitations by letting your\nusers by default query any document that you allow in the manner\nthey wish, and with the default behavior allowing you to keep your\ndata in simple static files, such as arbitrary HTML files or, on\nthe other hand, HTML files shaped in a manner more similar\nin structure to traditional simpler databases (e.g., an HTML\nfile consisting solely of a single table, hierarchical list, etc.).\n\nOther possible uses may include selective spidering.\n\n**Note that as mentioned the protocol syntax as well as tools are still\nvery much experimental and are used at your own risk. Allowing\narbitrary XPath or CSS Selector syntax may present some\nincreased risk of DDOS attacks.**\n\nThe Web IS a database, and it is about time that\nits data becomes opened--for the humblest content creator\nto experienced mashup developers.\n\n## Future goals (general)\n\nWhile the first goal is to allow regular website content creators to\nhave their content available to searches--with HTML/XML being\nthe inevitable document-centric format, JSON support (via\nJSONPath / RQL?) is also envisaged.\n\nIt is also hoped, whether through minor markup changes to schema\nattachment, intelligent widgets may become more of a norm in exposing\nsophisticated, offlineable, type-aware and paginated widgets which do\nnot depend on the content creator being themselves a developer for\nthis functionality to be made available to users.\n\nSee the todos for more future goals for the project.\n\n## FAQ\n\n- *Why require headers rather than GET-friendly bookmarkable/shareable*\n *request parameters?* - I wanted the protocol to be able to overlay any\n dynamic as well as static system which might already be using its own\n request parameters. However, I would like to see a non-HTTP web protocol\n be created to work with these headers.\n\n- *If I generate my data dynamically (e.g., because I have files too large*\n *to be efficiently queried against a static file), how is the protocol*\n *still useful?* - The query mechanism and API will still be reusable by\n local apps (or remote ones such as the Firefox add-on if the server is\n enabled in a manner like the included Node server), code libraries, etc.,\n even if you do not wish to restrict yourself to static files. For example,\n even though your API might filter the raw data as it is, an HTTPQuery could\n be allowed to run on top of that filtered data.\n\n- *Why not use OData?* - While OData has pioneered work in this direction,\n it is hoped that this simple protocol will gain support and allow piecemeal\n selection of content in a manner reusable by servers and clients with an\n absolutely bare minimum of effort by content creators (and even\n implementers).\n\n## Informal, tentative specification for HTTP Query headers\n\n1. The client MAY submit a **query-client-support** header including a\n whitespace-separated list of supported query mechanisms (currently `xpath1`\n and `css3`). The HTTPQuery server MUST NOT require this header when other\n HTTPQuery queries are supplied. (The server MAY utilize the client support\n header to display minimal content by default since the client user is assumed\n to be familiar with his own browser's capabilities in utilizing the protocol\n to query only what he needs. The header **query-full-request** MAY be\n submitted (instead or in addition) by the client to counter-act this\n assumption to display minimal content. If the client wishes to make the\n request for minimal data explicit, it can make a HEAD request.)\n2. The server SHOULD advertise **query-server-support** with a comma-separated\n list of supported query types (currently `xpath1`, `css3`, and `jsonata`)\n before specific queries are made and MUST advertise the header when queries\n are successfully returned (and SHOULD return the header if there is a\n failure). This information MAY be used by clients to inform users of the\n query mechanisms available to them for the site.\n3. Requests are made by headers of the form, \"query-request-<QUERY MECHANISM>\".\n Clients and servers should support **query-request-xpath1** and\n **query-request-css3** and MAY support other custom mechanisms.\n4. Since queries may return node sets, the question arises as to how to group\n nodes in the results. In the case of normal HTML payloads, a query-supporting\n server MUST join together XPath1 and CSS3 query results as a string and\n without a separator between elements. In the case of normal XML payloads,\n since well-formedness will typically be expected and it is possible that\n more than one item is returned (i.e., without a single root node), a\n query-supporting server MUST wrap the resulting XML element(s) within a\n `div` element in the XHTML namespace (i.e., within\n `<div xmlns=\"http://www.w3.org/1999/xhtml\"></div>`). The query-supporting\n server of XPath1 or CSS3 queries MUST also support the ability to recognize\n an additional client-supplied header, **query-format** set to the value\n `json` which will deliver the XML or HTML results in the JSON format\n while also recognizing the header **query-content-type** which will\n indicate the content-type of the wrapped fragments (i.e., text/html or an\n XML MIME type) as distinct from the regular **Content-Type** header\n which for JSON should be `application/json`.\n5. The query-supporting server for CSS3 queries MUST support two extensions\n described below for obtaining an attribute value or text nodes. In such\n cases, the format will be a string. The query-supporting server of such\n queries MUST also support the ability to recognize an additional\n client-supplied header, **query-format** set to the value `json` so as to\n deliver the string in JSON format. A **query-content-type** response header\n MAY be provided if set to `text/plain`. (Headers may be added in the future\n to distinguish whether JSON delivery should concatenate text node results\n into a single string or not.)\n\n## CSS Selector modifications\n\nThe CSS Selector syntax has been modified to include the following\npseudo-classes:\n\n- **attr(...)** - Grab the actual attribute content (of the first attribute\nin the node set). This is necessary since attribute selectors are used\nin CSS to target elements rather than attributes.\n- **text()** - Grab the text nodes within the node set\n\n## Comparison with OData\n\nHTTP Query is a much lighter protocol. HTTP Query does hope to eventually\nsupport modification as does OData, but in a web-friendly, hierarchical manner\nsuch as with <https://github.com/kriszyp/put-selector>.\n\n(INCOMPLETE)\n\n## Ideas for possible future todos\n\n1. Support **[JSONiq](https://www.jsoniq.org/)** (XQuery-like power for JSON)\n1. Support **XQuery** (via\n [https://github.com/FontoXML/fontoxpath](fontoxpath)?)\n1. **Restore add-on** (as webextension) so can make queries and joins even to\n non-httpquery sites!\n1. i18n-ize\n1. Add tests (especially ensuring content-type works properly with each mode)\n1. Add an Ajax site-independent web application, including ability to supply\n arbitrary URLs with cross-site headers or making AsYouWish requests (would\n be facilitated by <https://bugzilla.mozilla.org/show_bug.cgi?id=880908>; see\n also <https://bugzilla.mozilla.org/show_bug.cgi?id=855936>)\n - Do demos against HTML tables, HTML Microdata, TEI (XML)\n1. Server todos:\n - Make the Node.js implementation wrappable for use with other existing\n dynamic code.\n - Make the PHP implementation more easily wrappable for use with dynamic\n code.\n - Contemplate what error/code to return (instead of full text) if user\n submits query header but not supported\n - Get XPath to work with HTML DOM and get CSS Selectors to work with XML\n (if it cannot already)?); test on (Linux) environment with jsdom\n - Fix local xpath query \"//a/text()\" or \"//a/@href\"\n (ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these\n apparently)\n - Allow CSS3 :text() nodes to be returned as an array of nodes for JSON\n (local and remote); allow explicit :html() ?\n - Get server to resolve new HTML Includes (or XInclude's) (and entities?)\n server-side before performing queries\n - Support by cross-domain access by default (since presence of headers\n already implies at least some flexibility in querying)?\n - Ability to send Relative XPaths (or CSS Sel.), so if file really big,\n can start at a certain point\n - Store user access in simple text file and use to check along with\n BrowserID (not related to protocol but another \"powerful-by-default\"\n feature)\n - Tool to auto-generate XML schema for SQL database table along with a\n single raw `<table>` export URL (but only enabling downloading within\n limits (see limits below); XPath/CSS Selectors (or paginating query\n mechanism, etc.) can then be translated back into equivalent SQL.\n1. Add-on todos:\n - Confirm why queries aren't working for some sites and respond\n accordingly? (e.g., Yahoo and StackOverflow are detecting automatic\n Ajax header?)\n - Allow JSON format to be displayed as actual application/json\n content-type (and XML as application/xml)\n - Query input\n - XPath (or CSS Sel.) syntax coloring? (also update regex coloring for\n CodeMirror!)\n - XPath (or CSS Sel.) with auto-complete based on header-associated\n schema (including for HTML-treated-as-XML?) or at least general\n awareness of language/content-type (HTML/XML)\n - Page-specific preferences on whether to send appropriate headers to\n load HTTPQuery-supporting sites as empty (or full) by default (instead\n of possible Ajax pagination by the server); selectively advertise\n support headers (or at least minimize types on which the\n \"http-on-modify-request\" header is sent)?\n1. Protocol enhancements\n - JSON support (via JSONPath / RQL?)\n - Schema attachment/markup enhancements for intelligent, type-aware,\n paginated, offlineable widgets:\n - Schema attachment (or markup) used by browser (or server) to make\n suitable query interface\n - Server indicates header-specified RelaxNG, Schematron for\n starters, and browser delivers simultaneously with content\n if possible\n - Schema-awareness by browser to transform current document into\n queryable doc could work even if doc only partly loaded (or\n offline)\n - Types:\n 1. Tables\n - Browser displays the requested data inline with\n already-loaded data, or as requested by user (for file\n download, separate dialog, etc.?)\n - Allow mashable plugins, e.g., for user providing their\n own Excel-like automated columns (e.g., if user wanted\n all tables to allow a given column's data to be\n translated word-for-word and added as a new column)\n 1. Lists\n - Hierarchical drill-down for browsing and search; also\n as requested by user (for file download, separate\n dialog, etc.?)\n 1. Numbered paragraphs\n - Detect paragraph elements within a file and auto-number\n them (or use an attribute)--e.g., TEI's `<p n=\"\">` for\n an automatic\n paragraph range selection interface\n 1. Arbitrary but type-aware queries (e.g., use a date selector\n for finding all dates within a range (of any element or\n a given element anywhere in the document)\n - Allow both browser-side and server-side overlays (strip at\n least some markup server-side if handling server-side so client\n doesn't try to redo); might use headers to detect whether to\n let user use their own browser-supplied one or some Ajax-based,\n simulating widget; use Custom Elements?\n - Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror\n allowing inline querying and modification of data for a given\n large document without needing to load it all into the IDE view\n unless desired. Schema-driven input could also facilitate more\n common use of schemas with the query protocol (e.g., the schema\n for RelaxNG or Schematron could provide auto-complete or XSL on\n a schema could build a form for input).\n - WYSIWYG table editor to allow adding of types (as well as\n max, starting point, etc.), so average users can create\n databases (and schematic info) easily in HTML\n - Some kind of auto-update mechanism for offline storage? (OData\n ideas?)\n - Limits\n - Client-side size limits - e.g., normally download full load\n for offline caching (a particular site?) unless over\n 200 MB/table, etc.\n - Server indicates support limitations (e. g., size limits,\n max rows/request (page) for tables, lists, etc.) and server\n ignores if user disregards\n - Allow server (or browser?) to read header or markup\n provided XPointers to find only specific elements\n supporting querying/pagination, etc. and with their\n limits\n - Possible default behaviors: avoid resolving includes?,\n default row count/size per server, or page-specified\n suggestions for partial loading and query points)\n - If the HTML is already database-generated, the server\n could use its own default number of rows/records/size\n - Offline\n - Coordinate full (or even partial) delivery for offline\n caching and querying (with automatic detection of offline\n mode, but also option to query offline even if online)\n - Ensure offline storage works with data added after (and\n before) page load\n - Add-on to allow any page stored for offline use (and cached\n in user-selected collections); ensure one can also store\n results when making selective queries\n - (XQuery/XSL/XProc or) jQuery-like syntax for more developer or\n user-driven complex, server-side reshaping (along with XPath/CSS\n Selectors) including mashups, though this presents even more challenges\n re: security\n - Include ability to include & mix other sources declaratively yet\n query together - e.g., protocol to send current doc to XSL as\n param to show automated cols\n - Allow data modification, e.g., something friendly like\n <https://github.com/kriszyp/put-selector>\n - Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to\n request and reshape foreign sites with user permission\n - Integrate into privileged AsYouWish HTML pages\n - Add jQuery-like syntax option into add-on dialog with option to save\n as ayw-HTML (or create HTML content-type based on JS alone without\n `<script></script>`) (and then do for my own JML HTML-as-JSON\n content-type)\n - Other related protocols\n - Implement a related file search protocol to search all files in a\n folder, etc. (On the desktop, see an analogous proposal for Firefox\n desktop search, at\n <https://bugzilla.mozilla.org/show_bug.cgi?id=878626>. Implement via\n Gopher (or METS)-like protocol? Check for <link/> to advertise\n support and thereby show interface?\n - Consider headers/protocols where you can get just what you want\n (e.g., Gopher, XMPP Data Forms), but with option for author to\n surround with arbitrary HTML\n"
}
# httpquery
***Note that this is still in alpha stages, so may be unstable or
non-functional***
non-functional; the PHP and add-on, in particular, are not currently
functional.***

@@ -20,13 +21,13 @@ *HTTP query protocol with proof-of-concept implementations obtaining

HTTPQuery is an *experimental* protocol with the following tools:
* Proof-of-concept **Firefox addon** (web app to
- Proof-of-concept **Firefox addon** (web app to
come) to allow remote HTTPQueries without access
restrictions
* **Node.js and PHP server file handler implementations** to allow remote queries to
be made to obtain subsets of HTML or XML data via XPath or CSS selector
syntax (currently XPath is for XML/XHTML only; CSS Selectors for HTML only?).
Static HTML/XML files can be read from the desktop before
being transformed by the client-submitted XPath
or CSS Selectors query. Files with extension "html", "xhtml", "xml",
or "tei" are currently recognized (files placed within the respective
server subfolder ("Node" or "PHP")). Please see their respective README's.
- **Node.js and PHP server file handler implementations** to allow remote
queries to be made to obtain subsets of HTML or XML data via XPath or CSS
selector syntax (currently XPath is for XML/XHTML only; CSS Selectors for
HTML only?). Static HTML/XML files can be read from the desktop before
being transformed by the client-submitted XPath or CSS Selectors query.
Files with extension "html", "xhtml", "xml", or "tei" are currently recognized
(files placed within the respective server subfolder ("Node" or "PHP")).
Please see their respective README's.

@@ -84,17 +85,69 @@ A PHP demo server is also planned.

- *Why require headers rather than GET-friendly bookmarkable/shareable request parameters?* - I wanted the protocol to be able to overlay any dynamic as well as static system which might already be using its own request parameters. However, I would like to see a non-HTTP web protocol be created to work with these headers.
- *Why require headers rather than GET-friendly bookmarkable/shareable*
*request parameters?* - I wanted the protocol to be able to overlay any
dynamic as well as static system which might already be using its own
request parameters. However, I would like to see a non-HTTP web protocol
be created to work with these headers.
- *If I generate my data dynamically (e.g., because I have files too large to be efficiently queried against a static file), how is the protocol still useful?* - The query mechanism and API will still be reusable by local apps (or remote ones such as the Firefox add-on if the server is enabled in a manner like the included Node server), code libraries, etc., even if you do not wish to restrict yourself to static files. For example, even though your API might filter the raw data as it is, an HTTPQuery could be allowed to run on top of that filtered data.
- *If I generate my data dynamically (e.g., because I have files too large*
*to be efficiently queried against a static file), how is the protocol*
*still useful?* - The query mechanism and API will still be reusable by
local apps (or remote ones such as the Firefox add-on if the server is
enabled in a manner like the included Node server), code libraries, etc.,
even if you do not wish to restrict yourself to static files. For example,
even though your API might filter the raw data as it is, an HTTPQuery could
be allowed to run on top of that filtered data.
- *Why not use OData?* - While OData has pioneered work in this direction, it is hoped that this simple protocol will gain support and allow
piecemeal selection of content in a manner reusable by servers and clients with an absolutely bare minimum of effort by content
creators (and even implementers).
- *Why not use OData?* - While OData has pioneered work in this direction,
it is hoped that this simple protocol will gain support and allow piecemeal
selection of content in a manner reusable by servers and clients with an
absolutely bare minimum of effort by content creators (and even
implementers).
## Informal, tentative specification for HTTP Query headers
1. The client MAY submit a **query-client-support** header including a whitespace-separated list of supported query mechanisms (currently `xpath1` and `css3`). The HTTPQuery server MUST NOT require this header when other HTTPQuery queries are supplied. (The server MAY utilize the client support header to display minimal content by default since the client user is assumed to be familiar with his own browser's capabilities in utilizing the protocol to query only what he needs. The header **query-full-request** MAY be submitted (instead or in addition) by the client to counter-act this assumption to display minimal content. If the client wishes to make the request for minimal data explicit, it can make a HEAD request.)
2. The server SHOULD advertise **query-server-support** with a comma-separated list of supported query types (currently `xpath1`, `css3`, and `jsonata`) before specific queries are made and MUST advertise the header when queries are successfully returned (and SHOULD return the header if there is a failure). This information MAY be used by clients to inform users of the query mechanisms available to them for the site.
3. Requests are made by headers of the form, "query-request-<QUERY MECHANISM>". Clients and servers should support **query-request-xpath1** and **query-request-css3** and MAY support other custom mechanisms.
4. Since queries may return node sets, the question arises as to how to group nodes in the results. In the case of normal HTML payloads, a query-supporting server MUST join together XPath1 and CSS3 query results as a string and without a separator between elements. In the case of normal XML payloads, since well-formedness will typically be expected and it is possible that more than one item is returned (i.e., without a single root node), a query-supporting server MUST wrap the resulting XML element(s) within a `div` element in the XHTML namespace (i.e., within `<div xmlns="http://www.w3.org/1999/xhtml"></div>`). The query-supporting server of XPath1 or CSS3 queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` which will deliver the XML or HTML results in the JSON format while also recognizing the header **query-content-type** which will indicate the content-type of the wrapped fragments (i.e., text/html or an XML MIME type) as distinct from the regular **Content-Type** header which for JSON should be `application/json`.
5. The query-supporting server for CSS3 queries MUST support two extensions described below for obtaining an attribute value or text nodes. In such cases, the format will be a string. The query-supporting server of such queries MUST also support the ability to recognize an additional client-supplied header, **query-format** set to the value `json` so as to deliver the string in JSON format. A **query-content-type** response header MAY be provided if set to `text/plain`. (Headers may be added in the future to distinguish whether JSON delivery should concatenate text node results into a single string or not.)
1. The client MAY submit a **query-client-support** header including a
whitespace-separated list of supported query mechanisms (currently `xpath1`
and `css3`). The HTTPQuery server MUST NOT require this header when other
HTTPQuery queries are supplied. (The server MAY utilize the client support
header to display minimal content by default since the client user is assumed
to be familiar with his own browser's capabilities in utilizing the protocol
to query only what he needs. The header **query-full-request** MAY be
submitted (instead or in addition) by the client to counter-act this
assumption to display minimal content. If the client wishes to make the
request for minimal data explicit, it can make a HEAD request.)
2. The server SHOULD advertise **query-server-support** with a comma-separated
list of supported query types (currently `xpath1`, `css3`, and `jsonata`)
before specific queries are made and MUST advertise the header when queries
are successfully returned (and SHOULD return the header if there is a
failure). This information MAY be used by clients to inform users of the
query mechanisms available to them for the site.
3. Requests are made by headers of the form, "query-request-<QUERY MECHANISM>".
Clients and servers should support **query-request-xpath1** and
**query-request-css3** and MAY support other custom mechanisms.
4. Since queries may return node sets, the question arises as to how to group
nodes in the results. In the case of normal HTML payloads, a query-supporting
server MUST join together XPath1 and CSS3 query results as a string and
without a separator between elements. In the case of normal XML payloads,
since well-formedness will typically be expected and it is possible that
more than one item is returned (i.e., without a single root node), a
query-supporting server MUST wrap the resulting XML element(s) within a
`div` element in the XHTML namespace (i.e., within
`<div xmlns="http://www.w3.org/1999/xhtml"></div>`). The query-supporting
server of XPath1 or CSS3 queries MUST also support the ability to recognize
an additional client-supplied header, **query-format** set to the value
`json` which will deliver the XML or HTML results in the JSON format
while also recognizing the header **query-content-type** which will
indicate the content-type of the wrapped fragments (i.e., text/html or an
XML MIME type) as distinct from the regular **Content-Type** header
which for JSON should be `application/json`.
5. The query-supporting server for CSS3 queries MUST support two extensions
described below for obtaining an attribute value or text nodes. In such
cases, the format will be a string. The query-supporting server of such
queries MUST also support the ability to recognize an additional
client-supplied header, **query-format** set to the value `json` so as to
deliver the string in JSON format. A **query-content-type** response header
MAY be provided if set to `text/plain`. (Headers may be added in the future
to distinguish whether JSON delivery should concatenate text node results
into a single string or not.)

@@ -106,11 +159,12 @@ ## CSS Selector modifications

* **attr(...)** - Grab the actual attribute content (of the first attribute
- **attr(...)** - Grab the actual attribute content (of the first attribute
in the node set). This is necessary since attribute selectors are used
in CSS to target elements rather than attributes.
* **text()** - Grab the text nodes within the node set
- **text()** - Grab the text nodes within the node set
## Comparison with OData
HTTP Query is a much lighter protocol. HTTP Query does hope to eventually support modification as does OData,
but in a web-friendly, hierarchical manner such as with https://github.com/kriszyp/put-selector.
HTTP Query is a much lighter protocol. HTTP Query does hope to eventually
support modification as does OData, but in a web-friendly, hierarchical manner
such as with <https://github.com/kriszyp/put-selector>.

@@ -121,64 +175,158 @@ (INCOMPLETE)

1. Support **[JSONiq](https://www.jsoniq.org/)** (XQuery-like power for JSON)
1. Support **XQuery** (via
[https://github.com/FontoXML/fontoxpath](fontoxpath)?)
1. **Restore add-on** (as webextension) so can make queries and joins even to
non-httpquery sites!
1. i18n-ize
1. Add tests (especially ensuring content-type works properly with each mode)
1. Add an Ajax site-independent web application, including ability to supply arbitrary URLs with cross-site headers or making AsYouWish requests (would be facilitated by https://bugzilla.mozilla.org/show_bug.cgi?id=880908 ; see also https://bugzilla.mozilla.org/show_bug.cgi?id=855936 )
* Do demos against HTML tables, HTML Microdata, TEI (XML)
1. Add an Ajax site-independent web application, including ability to supply
arbitrary URLs with cross-site headers or making AsYouWish requests (would
be facilitated by <https://bugzilla.mozilla.org/show_bug.cgi?id=880908>; see
also <https://bugzilla.mozilla.org/show_bug.cgi?id=855936>)
- Do demos against HTML tables, HTML Microdata, TEI (XML)
1. Server todos:
* Make the Node.js implementation wrappable for use with other existing dynamic code.
* Make the PHP implementation more easily wrappable for use with dynamic code.
* Contemplate what error/code to return (instead of full text) if user submits query header but not supported
* Get XPath to work with HTML DOM and get CSS Selectors to work with XML (if it cannot already)?); test on (Linux) environment with jsdom
* Fix local xpath query "//a/text()" or "//a/@href" (ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these apparently)
* Allow CSS3 :text() nodes to be returned as an array of nodes for JSON (local and remote); allow explicit :html() ?
* Get server to resolve new HTML Includes (or XInclude's) (and entities?) server-side before performing queries
* Support by cross-domain access by default (since presence of headers already implies at least some flexibility in querying)?
* Ability to send Relative XPaths (or CSS Sel.), so if file really big, can start at a certain point
* Store user access in simple text file and use to check along with BrowserID (not related to protocol but another "powerful-by-default" feature)
* Tool to auto-generate XML schema for SQL database table along with a single raw `<table>` export URL (but only enabling downloading
within limits (see limits below); XPath/CSS Selectors (or paginating query mechanism, etc.) can then be translated back into equivalent SQL.
- Make the Node.js implementation wrappable for use with other existing
dynamic code.
- Make the PHP implementation more easily wrappable for use with dynamic
code.
- Contemplate what error/code to return (instead of full text) if user
submits query header but not supported
- Get XPath to work with HTML DOM and get CSS Selectors to work with XML
(if it cannot already)?); test on (Linux) environment with jsdom
- Fix local xpath query "//a/text()" or "//a/@href"
(ORDERED_NODE_SNAPSHOT_TYPE === 7 is ok with arrays but not these
apparently)
- Allow CSS3 :text() nodes to be returned as an array of nodes for JSON
(local and remote); allow explicit :html() ?
- Get server to resolve new HTML Includes (or XInclude's) (and entities?)
server-side before performing queries
- Support by cross-domain access by default (since presence of headers
already implies at least some flexibility in querying)?
- Ability to send Relative XPaths (or CSS Sel.), so if file really big,
can start at a certain point
- Store user access in simple text file and use to check along with
BrowserID (not related to protocol but another "powerful-by-default"
feature)
- Tool to auto-generate XML schema for SQL database table along with a
single raw `<table>` export URL (but only enabling downloading within
limits (see limits below); XPath/CSS Selectors (or paginating query
mechanism, etc.) can then be translated back into equivalent SQL.
1. Add-on todos:
* Confirm why queries aren't working for some sites and respond accordingly? (e.g., Yahoo and StackOverflow are detecting automatic Ajax header?)
* Allow JSON format to be displayed as actual application/json content-type (and XML as application/xml)
* Query input
* XPath (or CSS Sel.) syntax coloring? (also update regex coloring for CodeMirror!)
* XPath (or CSS Sel.) with auto-complete based on header-associated schema (including for HTML-treated-as-XML?) or at least general awareness of language/content-type (HTML/XML)
* Page-specific preferences on whether to send appropriate headers to load HTTPQuery-supporting sites as empty (or full) by default (instead of possible Ajax pagination by the server); selectively advertise support headers (or at least minimize types on which the "http-on-modify-request" header is sent)?
- Confirm why queries aren't working for some sites and respond
accordingly? (e.g., Yahoo and StackOverflow are detecting automatic
Ajax header?)
- Allow JSON format to be displayed as actual application/json
content-type (and XML as application/xml)
- Query input
- XPath (or CSS Sel.) syntax coloring? (also update regex coloring for
CodeMirror!)
- XPath (or CSS Sel.) with auto-complete based on header-associated
schema (including for HTML-treated-as-XML?) or at least general
awareness of language/content-type (HTML/XML)
- Page-specific preferences on whether to send appropriate headers to
load HTTPQuery-supporting sites as empty (or full) by default (instead
of possible Ajax pagination by the server); selectively advertise
support headers (or at least minimize types on which the
"http-on-modify-request" header is sent)?
1. Protocol enhancements
* JSON support (via JSONPath / RQL?)
* Schema attachment/markup enhancements for intelligent, type-aware, paginated, offlineable widgets:
* Schema attachment (or markup) used by browser (or server) to make suitable query interface
* Server indicates header-specified RelaxNG, Schematron for starters, and browser delivers simultaneously with content if possible
* Schema-awareness by browser to transform current document into queryable doc could work even if doc only partly loaded (or offline)
* Types:
- JSON support (via JSONPath / RQL?)
- Schema attachment/markup enhancements for intelligent, type-aware,
paginated, offlineable widgets:
- Schema attachment (or markup) used by browser (or server) to make
suitable query interface
- Server indicates header-specified RelaxNG, Schematron for
starters, and browser delivers simultaneously with content
if possible
- Schema-awareness by browser to transform current document into
queryable doc could work even if doc only partly loaded (or
offline)
- Types:
1. Tables
* Browser displays the requested data inline with already-loaded data, or as requested by user (for file download, separate dialog, etc.?)
* Allow mashable plugins, e.g., for user providing their own Excel-like automated columns (e.g., if user wanted all tables to allow a given column's data to be translated word-for-word and added as a new column)
- Browser displays the requested data inline with
already-loaded data, or as requested by user (for file
download, separate dialog, etc.?)
- Allow mashable plugins, e.g., for user providing their
own Excel-like automated columns (e.g., if user wanted
all tables to allow a given column's data to be
translated word-for-word and added as a new column)
1. Lists
* Hierarchical drill-down for browsing and search; also as requested by user (for file download, separate dialog, etc.?)
- Hierarchical drill-down for browsing and search; also
as requested by user (for file download, separate
dialog, etc.?)
1. Numbered paragraphs
* Detect paragraph elements within a file and auto-number them (or use an attribute)--e.g., TEI's `<p n="">` for an automatic
- Detect paragraph elements within a file and auto-number
them (or use an attribute)--e.g., TEI's `<p n="">` for
an automatic
paragraph range selection interface
1. Arbitrary but type-aware queries (e.g., use a date selector for finding all dates within a range (of any element or a given element anywhere in the document)
* Allow both browser-side and server-side overlays (strip at least some markup server-side if handling server-side so client doesn't try to redo); might use headers to detect whether to let user use their own browser-supplied one or some Ajax-based, simulating widget; use Custom Elements?
* Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror allowing inline querying and modification of data for a given large document without needing to load it all into the IDE view unless desired. Schema-driven input could also facilitate more common use of schemas with the query protocol (e.g., the schema for RelaxNG or Schematron could provide auto-complete or XSL on a schema could build a form for input).
* WYSIWYG table editor to allow adding of types (as well as max, starting point, etc.), so average users can create databases (and schematic info) easily in HTML
* Some kind of auto-update mechanism for offline storage? (OData ideas?)
* Limits
* Client-side size limits - e.g., normally download full load for offline caching (a particular site?) unless over 200 MB/table, etc.
* Server indicates support limitations (e. g., size limits, max rows/request (page) for tables, lists, etc.) and server ignores if user disregards
* Allow server (or browser?) to read header or markup provided XPointers to find only specific elements supporting querying/pagination, etc. and with their limits
* Possible default behaviors: avoid resolving includes?, default row count/size per server, or page-specified suggestions for partial loading and query points)
* If the HTML is already database-generated, the server could use its own default number of rows/records/size
* Offline
* Coordinate full (or even partial) delivery for offline caching and querying (with automatic detection of offline mode, but also option to query offline even if online)
* Ensure offline storage works with data added after (and before) page load
* Add-on to allow any page stored for offline use (and cached in user-selected collections); ensure one can also store results when making selective queries
* (XQuery/XSL/XProc or) jQuery-like syntax for more developer or user-driven complex, server-side reshaping (along with XPath/CSS Selectors) including mashups, though this presents even more challenges re: security
* Include ability to include & mix other sources declaratively yet query together - e.g., protocol to send current doc to XSL as param to show automated cols
* Allow data modification, e.g., something friendly like https://github.com/kriszyp/put-selector
* Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to request and reshape foreign sites with user permission
* Integrate into privileged AsYouWish HTML pages
* Add jQuery-like syntax option into add-on dialog with option to save as ayw-HTML (or create HTML content-type based on JS alone without <script></script>) (and then do for my own JML HTML-as-JSON content-type)
* Other related protocols
* Implement a related file search protocol to search all files in a folder, etc. (On the desktop, see an analogous proposal for Firefox desktop search, at https://bugzilla.mozilla.org/show_bug.cgi?id=878626 . Implement via Gopher (or METS)-like protocol? Check for <link/> to advertise support and thereby show interface?
* Consider headers/protocols where you can get just what you want (e.g., Gopher, XMPP Data Forms), but with option for author to surround with arbitrary HTML
1. Arbitrary but type-aware queries (e.g., use a date selector
for finding all dates within a range (of any element or
a given element anywhere in the document)
- Allow both browser-side and server-side overlays (strip at
least some markup server-side if handling server-side so client
doesn't try to redo); might use headers to detect whether to
let user use their own browser-supplied one or some Ajax-based,
simulating widget; use Custom Elements?
- Web-based IDE (WIDE) to integrate with CKEditor/CodeMirror
allowing inline querying and modification of data for a given
large document without needing to load it all into the IDE view
unless desired. Schema-driven input could also facilitate more
common use of schemas with the query protocol (e.g., the schema
for RelaxNG or Schematron could provide auto-complete or XSL on
a schema could build a form for input).
- WYSIWYG table editor to allow adding of types (as well as
max, starting point, etc.), so average users can create
databases (and schematic info) easily in HTML
- Some kind of auto-update mechanism for offline storage? (OData
ideas?)
- Limits
- Client-side size limits - e.g., normally download full load
for offline caching (a particular site?) unless over
200 MB/table, etc.
- Server indicates support limitations (e. g., size limits,
max rows/request (page) for tables, lists, etc.) and server
ignores if user disregards
- Allow server (or browser?) to read header or markup
provided XPointers to find only specific elements
supporting querying/pagination, etc. and with their
limits
- Possible default behaviors: avoid resolving includes?,
default row count/size per server, or page-specified
suggestions for partial loading and query points)
- If the HTML is already database-generated, the server
could use its own default number of rows/records/size
- Offline
- Coordinate full (or even partial) delivery for offline
caching and querying (with automatic detection of offline
mode, but also option to query offline even if online)
- Ensure offline storage works with data added after (and
before) page load
- Add-on to allow any page stored for offline use (and cached
in user-selected collections); ensure one can also store
results when making selective queries
- (XQuery/XSL/XProc or) jQuery-like syntax for more developer or
user-driven complex, server-side reshaping (along with XPath/CSS
Selectors) including mashups, though this presents even more challenges
re: security
- Include ability to include & mix other sources declaratively yet
query together - e.g., protocol to send current doc to XSL as
param to show automated cols
- Allow data modification, e.g., something friendly like
<https://github.com/kriszyp/put-selector>
- Create corresponding bookmarkable/shareable protocol (e.g., `query:`) to
request and reshape foreign sites with user permission
- Integrate into privileged AsYouWish HTML pages
- Add jQuery-like syntax option into add-on dialog with option to save
as ayw-HTML (or create HTML content-type based on JS alone without
`<script></script>`) (and then do for my own JML HTML-as-JSON
content-type)
- Other related protocols
- Implement a related file search protocol to search all files in a
folder, etc. (On the desktop, see an analogous proposal for Firefox
desktop search, at
<https://bugzilla.mozilla.org/show_bug.cgi?id=878626>. Implement via
Gopher (or METS)-like protocol? Check for <link/> to advertise
support and thereby show interface?
- Consider headers/protocols where you can get just what you want
(e.g., Gopher, XMPP Data Forms), but with option for author to
surround with arbitrary HTML
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc