@uwdata/mosaic-core
Advanced tools
Comparing version 0.10.0 to 0.11.0
{ | ||
"name": "@uwdata/mosaic-core", | ||
"version": "0.10.0", | ||
"version": "0.11.0", | ||
"description": "Scalable and extensible linked data views.", | ||
@@ -27,14 +27,14 @@ "keywords": [ | ||
"lint": "eslint src test", | ||
"test": "mocha 'test/**/*-test.js'", | ||
"test": "vitest run --dangerouslyIgnoreUnhandledErrors", | ||
"prepublishOnly": "npm run test && npm run lint && npm run build" | ||
}, | ||
"dependencies": { | ||
"@duckdb/duckdb-wasm": "^1.28.1-dev232.0", | ||
"@uwdata/mosaic-sql": "^0.10.0", | ||
"apache-arrow": "^16.1.0" | ||
"@duckdb/duckdb-wasm": "^1.28.1-dev278.0", | ||
"@uwdata/flechette": "^1.0.2", | ||
"@uwdata/mosaic-sql": "^0.11.0" | ||
}, | ||
"devDependencies": { | ||
"@uwdata/mosaic-duckdb": "^0.10.0" | ||
"@uwdata/mosaic-duckdb": "^0.11.0" | ||
}, | ||
"gitHead": "94fc4f0d4efc622001f6afd6714d1e9dda745be2" | ||
"gitHead": "861d616f39926a1d2aee83b59dbdd70b0b3caf12" | ||
} |
@@ -1,2 +0,2 @@ | ||
import { tableFromIPC } from 'apache-arrow'; | ||
import { decodeIPC } from '../util/decode-ipc.js'; | ||
@@ -8,3 +8,3 @@ export function restConnector(uri = 'http://localhost:3000/') { | ||
* @param {object} query | ||
* @param {'exec' | 'arrow' | 'json'} [query.type] The query type: 'exec', 'arrow', or 'json'. | ||
* @param {'exec' | 'arrow' | 'json' | 'create-bundle' | 'load-bundle'} [query.type] The query type. | ||
* @param {string} query.sql A SQL query string. | ||
@@ -24,3 +24,3 @@ * @returns the query result | ||
return query.type === 'exec' ? req | ||
: query.type === 'arrow' ? tableFromIPC(req) | ||
: query.type === 'arrow' ? decodeIPC(await (await req).arrayBuffer()) | ||
: (await req).json(); | ||
@@ -27,0 +27,0 @@ } |
@@ -1,2 +0,2 @@ | ||
import { tableFromIPC } from 'apache-arrow'; | ||
import { decodeIPC } from '../util/decode-ipc.js'; | ||
@@ -50,3 +50,3 @@ export function socketConnector(uri = 'ws://localhost:3000/') { | ||
} else if (query.type === 'arrow') { | ||
resolve(tableFromIPC(data.arrayBuffer())); | ||
resolve(decodeIPC(data)); | ||
} else { | ||
@@ -63,2 +63,3 @@ throw new Error(`Unexpected socket data: ${data}`); | ||
ws = new WebSocket(uri); | ||
ws.binaryType = 'arraybuffer'; | ||
for (const type in events) { | ||
@@ -89,3 +90,3 @@ ws.addEventListener(type, events[type]); | ||
* @param {object} query | ||
* @param {'exec' | 'arrow' | 'json'} [query.type] The query type: 'exec', 'arrow', or 'json'. | ||
* @param {'exec' | 'arrow' | 'json' | 'create-bundle' | 'load-bundle'} [query.type] The query type. | ||
* @param {string} query.sql A SQL query string. | ||
@@ -92,0 +93,0 @@ * @returns the query result |
import * as duckdb from '@duckdb/duckdb-wasm'; | ||
import { decodeIPC } from '../util/decode-ipc.js'; | ||
// bypass duckdb-wasm query method to get Arrow IPC bytes directly | ||
// https://github.com/duckdb/duckdb-wasm/issues/267#issuecomment-2252749509 | ||
function getArrowIPC(con, query) { | ||
return new Promise((resolve, reject) => { | ||
con.useUnsafe(async (bindings, conn) => { | ||
try { | ||
const buffer = await bindings.runQuery(conn, query); | ||
resolve(buffer); | ||
} catch (error) { | ||
reject(error); | ||
} | ||
}); | ||
}); | ||
} | ||
export function wasmConnector(options = {}) { | ||
@@ -48,3 +64,3 @@ const { duckdb, connection, ...opts } = options; | ||
* @param {object} query | ||
* @param {'exec' | 'arrow' | 'json'} [query.type] The query type: 'exec', 'arrow', or 'json'. | ||
* @param {'exec' | 'arrow' | 'json' | 'create-bundle' | 'load-bundle'} [query.type] The query type. | ||
* @param {string} query.sql A SQL query string. | ||
@@ -56,6 +72,6 @@ * @returns the query result | ||
const con = await getConnection(); | ||
const result = await con.query(sql); | ||
const result = await getArrowIPC(con, sql); | ||
return type === 'exec' ? undefined | ||
: type === 'arrow' ? result | ||
: result.toArray(); | ||
: type === 'arrow' ? decodeIPC(result) | ||
: decodeIPC(result).toArray(); | ||
} | ||
@@ -62,0 +78,0 @@ }; |
import { socketConnector } from './connectors/socket.js'; | ||
import { DataCubeIndexer } from './DataCubeIndexer.js'; | ||
import { MosaicClient } from './MosaicClient.js'; | ||
import { QueryManager, Priority } from './QueryManager.js'; | ||
import { queryFieldInfo } from './util/field-info.js'; | ||
import { QueryResult } from './util/query-result.js'; | ||
import { voidLogger } from './util/void-logger.js'; | ||
@@ -28,2 +30,6 @@ | ||
/** | ||
* @typedef {import('@uwdata/mosaic-sql').Query | string} QueryType | ||
*/ | ||
/** | ||
* A Mosaic Coordinator manages all database communication for clients and | ||
@@ -38,3 +44,4 @@ * handles selection updates. The Coordinator also performs optimizations | ||
* @param {boolean} [options.consolidate=true] Boolean flag to enable/disable query consolidation. | ||
* @param {object} [options.indexes] Data cube indexer options. | ||
* @param {import('./DataCubeIndexer.js').DataCubeIndexerOptions} [options.indexes] | ||
* Data cube indexer options. | ||
*/ | ||
@@ -49,9 +56,10 @@ export class Coordinator { | ||
} = {}) { | ||
/** @type {QueryManager} */ | ||
this.manager = manager; | ||
this.manager.cache(cache); | ||
this.manager.consolidate(consolidate); | ||
this.dataCubeIndexer = new DataCubeIndexer(this, indexes); | ||
this.databaseConnector(db); | ||
this.logger(logger); | ||
this.databaseConnector(db); | ||
this.clear(); | ||
this.dataCubeIndexer = new DataCubeIndexer(this, indexes); | ||
} | ||
@@ -103,3 +111,3 @@ | ||
* canceled if they are queued but have not yet been submitted. | ||
* @param {import('./util/query-result.js').QueryResult[]} requests An array | ||
* @param {QueryResult[]} requests An array | ||
* of query result objects, such as those returned by the `query` method. | ||
@@ -113,11 +121,12 @@ */ | ||
* Issue a query for which no result (return value) is needed. | ||
* @param {import('@uwdata/mosaic-sql').Query | string} query The query. | ||
* @param {QueryType | QueryType[]} query The query or an array of queries. | ||
* Each query should be either a Query builder object or a SQL string. | ||
* @param {object} [options] An options object. | ||
* @param {number} [options.priority] The query priority, defaults to | ||
* `Priority.Normal`. | ||
* @returns {import('./util/query-result.js').QueryResult} A query result | ||
* @returns {QueryResult} A query result | ||
* promise. | ||
*/ | ||
exec(query, { priority = Priority.Normal } = {}) { | ||
query = Array.isArray(query) ? query.join(';\n') : query; | ||
query = Array.isArray(query) ? query.filter(x => x).join(';\n') : query; | ||
return this.manager.request({ type: 'exec', query }, priority); | ||
@@ -129,3 +138,4 @@ } | ||
* consolidate with other queries and its results may be cached. | ||
* @param {import('@uwdata/mosaic-sql').Query | string} query The query. | ||
* @param {QueryType} query The query as either a Query builder object | ||
* or a SQL string. | ||
* @param {object} [options] An options object. | ||
@@ -136,4 +146,3 @@ * @param {'arrow' | 'json'} [options.type] The query result format type. | ||
* `Priority.Normal`. | ||
* @returns {import('./util/query-result.js').QueryResult} A query result | ||
* promise. | ||
* @returns {QueryResult} A query result promise. | ||
*/ | ||
@@ -152,7 +161,7 @@ query(query, { | ||
* for efficient future access. | ||
* @param {import('@uwdata/mosaic-sql').Query | string} query The query. | ||
* @param {QueryType} query The query as either a Query builder object | ||
* or a SQL string. | ||
* @param {object} [options] An options object. | ||
* @param {'arrow' | 'json'} [options.type] The query result format type. | ||
* @returns {import('./util/query-result.js').QueryResult} A query result | ||
* promise. | ||
* @returns {QueryResult} A query result promise. | ||
*/ | ||
@@ -163,7 +172,21 @@ prefetch(query, options = {}) { | ||
/** | ||
* Create a bundle of queries that can be loaded into the cache. | ||
* | ||
* @param {string} name The name of the bundle. | ||
* @param {[string | {sql: string}, {alias: string}]} queries The queries to save into the bundle. | ||
* @param {number} priority Request priority. | ||
* @returns {QueryResult} A query result promise. | ||
*/ | ||
createBundle(name, queries, priority = Priority.Low) { | ||
const options = { name, queries }; | ||
const options = { name, queries: queries.map(q => typeof q == 'string' ? {sql: q} : q) }; | ||
return this.manager.request({ type: 'create-bundle', options }, priority); | ||
} | ||
/** | ||
* Load a bundle into the cache. | ||
* @param {string} name The name of the bundle. | ||
* @param {number} priority Request priority. | ||
* @returns {QueryResult} A query result promise. | ||
*/ | ||
loadBundle(name, priority = Priority.High) { | ||
@@ -179,4 +202,4 @@ const options = { name }; | ||
* data (or error) to the client. | ||
* @param {import('./MosaicClient.js').MosaicClient} client A Mosaic client. | ||
* @param {import('@uwdata/mosaic-sql').Query | string} query The data query. | ||
* @param {MosaicClient} client A Mosaic client. | ||
* @param {QueryType} query The data query. | ||
* @param {number} [priority] The query priority. | ||
@@ -199,6 +222,4 @@ * @returns {Promise} A Promise that resolves upon completion of the update. | ||
* side effect, this method clears the current data cube indexer state. | ||
* @param {import('./MosaicClient.js').MosaicClient} client The client | ||
* to update. | ||
* @param {import('@uwdata/mosaic-sql').Query | string | null} [query] | ||
* The query to issue. | ||
* @param {MosaicClient} client The client to update. | ||
* @param {QueryType | null} [query] The query to issue. | ||
*/ | ||
@@ -209,3 +230,3 @@ requestQuery(client, query) { | ||
? this.updateClient(client, query) | ||
: client.update(); | ||
: Promise.resolve(client.update()); | ||
} | ||
@@ -215,4 +236,3 @@ | ||
* Connect a client to the coordinator. | ||
* @param {import('./MosaicClient.js').MosaicClient} client The Mosaic | ||
* client to connect. | ||
* @param {MosaicClient} client The Mosaic client to connect. | ||
*/ | ||
@@ -228,2 +248,10 @@ async connect(client) { | ||
// initialize client lifecycle | ||
this.initializeClient(client); | ||
// connect filter selection | ||
connectSelection(this, client.filterBy, client); | ||
} | ||
async initializeClient(client) { | ||
// retrieve field statistics | ||
@@ -235,6 +263,4 @@ const fields = client.fields(); | ||
// connect filter selection | ||
connectSelection(this, client.filterBy, client); | ||
client.requestQuery(); | ||
// request data query | ||
return client.requestQuery(); | ||
} | ||
@@ -244,4 +270,3 @@ | ||
* Disconnect a client from the coordinator. | ||
* @param {import('./MosaicClient.js').MosaicClient} client The Mosaic | ||
* client to disconnect. | ||
* @param {MosaicClient} client The Mosaic client to disconnect. | ||
*/ | ||
@@ -265,4 +290,4 @@ disconnect(client) { | ||
* @param {import('./Selection.js').Selection} selection A selection. | ||
* @param {import('./MosaicClient.js').MosaicClient} client A Mosiac | ||
* client that is filtered by the given selection. | ||
* @param {MosaicClient} client A Mosiac client that is filtered by the | ||
* given selection. | ||
*/ | ||
@@ -269,0 +294,0 @@ function connectSelection(mc, selection, client) { |
@@ -10,2 +10,10 @@ import { | ||
/** | ||
* @typedef {object} DataCubeIndexerOptions | ||
* @property {string} [schema] Database schema (namespace) in which to write | ||
* data cube index tables (default 'mosaic'). | ||
* @property {boolean} [options.enabled=true] Flag to enable or disable the | ||
* indexer. This setting can later be updated via the `enabled` method. | ||
*/ | ||
/** | ||
* Build and query optimized indices ("data cubes") for fast computation of | ||
@@ -16,5 +24,13 @@ * groupby aggregate queries over compatible client queries and selections. | ||
* realized as as database tables that can be queried for rapid updates. | ||
* | ||
* Compatible client queries must consist of only groupby dimensions and | ||
* supported aggregate functions. Compatible selections must contain an active | ||
* clause that exposes metadata for an interval or point value predicate. | ||
* | ||
* Data cube index tables are written to a dedicated schema (namespace) that | ||
* can be set using the *schema* constructor option. This schema acts as a | ||
* persistent cache, and index tables may be used across sessions. The | ||
* `dropIndexTables` method issues a query to remove *all* tables within | ||
* this schema. This may be needed if the original tables have updated data, | ||
* but should be used with care. | ||
*/ | ||
@@ -25,10 +41,7 @@ export class DataCubeIndexer { | ||
* @param {import('./Coordinator.js').Coordinator} coordinator A Mosaic coordinator. | ||
* @param {object} [options] Indexer options. | ||
* @param {boolean} [options.enabled=true] Flag to enable/disable indexer. | ||
* @param {boolean} [options.temp=true] Flag to indicate if generated data | ||
* cube index tables should be temporary tables. | ||
* @param {DataCubeIndexerOptions} [options] Data cube indexer options. | ||
*/ | ||
constructor(coordinator, { | ||
enabled = true, | ||
temp = true | ||
schema = 'mosaic', | ||
enabled = true | ||
} = {}) { | ||
@@ -38,4 +51,4 @@ /** @type {Map<import('./MosaicClient.js').MosaicClient, DataCubeInfo | Skip | null>} */ | ||
this.active = null; | ||
this.temp = temp; | ||
this.mc = coordinator; | ||
this._schema = schema; | ||
this._enabled = enabled; | ||
@@ -45,10 +58,9 @@ } | ||
/** | ||
* Set the enabled state of this indexer. If false, any cached state is | ||
* Set the enabled state of this indexer. If false, any local state is | ||
* cleared and subsequent index calls will return null until re-enabled. | ||
* @param {boolean} state The enabled state. | ||
* This method has no effect on any index tables already in the database. | ||
* @param {boolean} [state] The enabled state to set. | ||
*/ | ||
enabled(state) { | ||
if (state === undefined) { | ||
return this._enabled; | ||
} else if (this._enabled !== state) { | ||
set enabled(state) { | ||
if (this._enabled !== state) { | ||
if (!state) this.clear(); | ||
@@ -60,9 +72,52 @@ this._enabled = state; | ||
/** | ||
* Get the enabled state of this indexer. | ||
* @returns {boolean} The current enabled state. | ||
*/ | ||
get enabled() { | ||
return this._enabled; | ||
} | ||
/** | ||
* Set the database schema used by this indexer. Upon changes, any local | ||
* state is cleared. This method does _not_ drop any existing data cube | ||
* tables, use `dropIndexTables` before changing the schema to also remove | ||
* existing index tables in the database. | ||
* @param {string} [schema] The schema name to set. | ||
*/ | ||
set schema(schema) { | ||
if (this._schema !== schema) { | ||
this.clear(); | ||
this._schema = schema; | ||
} | ||
} | ||
/** | ||
* Get the database schema used by this indexer. | ||
* @returns {string} The current schema name. | ||
*/ | ||
get schema() { | ||
return this._schema; | ||
} | ||
/** | ||
* Issues a query through the coordinator to drop the current index table | ||
* schema. *All* tables in the schema will be removed and local state is | ||
* cleared. Call this method if the underlying base tables have been updated, | ||
* causing derived index tables to become stale and inaccurate. Use this | ||
* method with care! Once dropped, the schema will be repopulated by future | ||
* data cube indexer requests. | ||
* @returns A query result promise. | ||
*/ | ||
dropIndexTables() { | ||
this.clear(); | ||
return this.mc.exec(`DROP SCHEMA IF EXISTS "${this.schema}" CASCADE`); | ||
} | ||
/** | ||
* Clear the cache of data cube index table entries for the current active | ||
* selection clause. This method will also cancel any queued data cube table | ||
* creation queries that have not yet been submitted to the database. This | ||
* method does _not_ drop any existing data cube tables. | ||
* selection clause. This method does _not_ drop any existing data cube | ||
* tables. Use `dropIndexTables` to remove existing index tables from the | ||
* database. | ||
*/ | ||
clear() { | ||
this.mc.cancel(Array.from(this.indexes.values(), info => info?.result)); | ||
this.indexes.clear(); | ||
@@ -88,5 +143,5 @@ this.active = null; | ||
// if not enabled, do nothing | ||
if (!this._enabled) return null; | ||
if (!this.enabled) return null; | ||
const { indexes, mc, temp } = this; | ||
const { indexes, mc, schema } = this; | ||
const { source } = activeClause; | ||
@@ -137,4 +192,7 @@ | ||
const filter = selection.remove(source).predicate(client); | ||
info = dataCubeInfo(client.query(filter), active, indexCols); | ||
info.result = mc.exec(create(info.table, info.create, { temp })); | ||
info = dataCubeInfo(client.query(filter), active, indexCols, schema); | ||
info.result = mc.exec([ | ||
`CREATE SCHEMA IF NOT EXISTS ${schema}`, | ||
create(info.table, info.create, { temp: false }) | ||
]); | ||
info.result.catch(e => mc.logger().error(e)); | ||
@@ -236,3 +294,3 @@ } | ||
*/ | ||
function dataCubeInfo(clientQuery, active, indexCols) { | ||
function dataCubeInfo(clientQuery, active, indexCols, schema) { | ||
const { dims, aggr, aux } = indexCols; | ||
@@ -260,3 +318,3 @@ const { columns } = active; | ||
const id = (fnv_hash(create) >>> 0).toString(16); | ||
const table = `cube_index_${id}`; | ||
const table = `${schema}.cube_${id}`; | ||
@@ -270,3 +328,3 @@ // generate data cube select query | ||
return new DataCubeInfo({ table, create, active, select }); | ||
return new DataCubeInfo({ id, table, create, active, select }); | ||
} | ||
@@ -273,0 +331,0 @@ |
@@ -19,12 +19,7 @@ export { MosaicClient } from './MosaicClient.js'; | ||
export { | ||
isArrowTable, | ||
convertArrowArrayType, | ||
convertArrowValue, | ||
convertArrowColumn | ||
} from './util/convert-arrow.js' | ||
export { decodeIPC } from './util/decode-ipc.js'; | ||
export { distinct } from './util/distinct.js'; | ||
export { isArrowTable } from './util/is-arrow-table.js'; | ||
export { synchronizer } from './util/synchronizer.js'; | ||
export { throttle } from './util/throttle.js'; | ||
export { toDataColumns } from './util/to-data-columns.js' | ||
export { toDataColumns } from './util/to-data-columns.js'; |
@@ -106,2 +106,3 @@ import { throttle } from './util/throttle.js'; | ||
* be called, filtered by the current filterBy selection. | ||
* @returns {Promise} | ||
*/ | ||
@@ -124,2 +125,10 @@ requestQuery(query) { | ||
/** | ||
* Reset this client, initiating new field info and query requests. | ||
* @returns {Promise} | ||
*/ | ||
initialize() { | ||
return this._coordinator.initializeClient(this); | ||
} | ||
/** | ||
* Requests a client update. | ||
@@ -126,0 +135,0 @@ * For example to (re-)render an interface component. |
@@ -111,2 +111,8 @@ import { Query, Ref, isDescribeQuery } from '@uwdata/mosaic-sql'; | ||
} | ||
// @ts-ignore | ||
else if (query.select().some(({ expr }) => expr.aggregate)) { | ||
// if query is an ungrouped aggregate, add an explicit groupby to | ||
// prevent improper consolidation with non-aggregate queries | ||
q.$groupby('ALL'); | ||
} | ||
@@ -248,12 +254,9 @@ // key is just the transformed query as SQL | ||
* Project a consolidated result to a client result | ||
* @param {*} data Consolidated query result, as an Apache Arrow Table | ||
* @param {*} map Column name map as [source, target] pairs | ||
* @param {import('@uwdata/flechette').Table} data | ||
* Consolidated query result, as an Arrow Table | ||
* @param {[string, string][]} map Column name map as [source, target] pairs | ||
* @returns the projected Apache Arrow table | ||
*/ | ||
function projectResult(data, map) { | ||
const cols = {}; | ||
for (const [name, as] of map) { | ||
cols[as] = data.getChild(name); | ||
} | ||
return new data.constructor(cols); | ||
return data.select(map.map(x => x[0]), map.map(x => x[1])); | ||
} | ||
@@ -263,4 +266,5 @@ | ||
* Filter a consolidated describe query result to a client result | ||
* @param {*} data Consolidated query result | ||
* @param {*} map Column name map as [source, target] pairs | ||
* @param {import('@uwdata/flechette').Table} data | ||
* Consolidated query result, as an Arrow Table | ||
* @param {[string, string][]} map Column name map as [source, target] pairs | ||
* @returns the filtered table data | ||
@@ -267,0 +271,0 @@ */ |
import { consolidator } from './QueryConsolidator.js'; | ||
import { lruCache, voidCache } from './util/cache.js'; | ||
import { priorityQueue } from './util/priority-queue.js'; | ||
import { PriorityQueue } from './util/priority-queue.js'; | ||
import { QueryResult } from './util/query-result.js'; | ||
@@ -10,3 +10,3 @@ | ||
constructor() { | ||
this.queue = priorityQueue(3); | ||
this.queue = new PriorityQueue(3); | ||
this.db = null; | ||
@@ -113,3 +113,9 @@ this.clientCache = null; | ||
if (set.size) { | ||
this.queue.remove(({ result }) => set.has(result)); | ||
this.queue.remove(({ result }) => { | ||
if (set.has(result)) { | ||
result.reject('Canceled'); | ||
return true; | ||
} | ||
return false; | ||
}); | ||
} | ||
@@ -116,0 +122,0 @@ } |
@@ -13,2 +13,9 @@ import { or } from '@uwdata/mosaic-sql'; | ||
function create(options, include) { | ||
return new Selection( | ||
new SelectionResolver(options), | ||
include ? [include].flat() : include | ||
); | ||
} | ||
/** | ||
@@ -29,6 +36,9 @@ * Represents a dynamic set of query filter predicates. | ||
* setting determines the default selection state. | ||
* @param {Selection|Selection[]} [options.include] Upstream selections whose | ||
* clauses should be included as part of the new selection. Any clauses | ||
* published to upstream selections will be relayed to the new selection. | ||
* @returns {Selection} The new Selection instance. | ||
*/ | ||
static intersect({ cross = false, empty = false } = {}) { | ||
return new Selection(new SelectionResolver({ cross, empty })); | ||
static intersect({ cross = false, empty = false, include = [] } = {}) { | ||
return create({ cross, empty }, include); | ||
} | ||
@@ -46,6 +56,9 @@ | ||
* setting determines the default selection state. | ||
* @param {Selection|Selection[]} [options.include] Upstream selections whose | ||
* clauses should be included as part of the new selection. Any clauses | ||
* published to upstream selections will be relayed to the new selection. | ||
* @returns {Selection} The new Selection instance. | ||
*/ | ||
static union({ cross = false, empty = false } = {}) { | ||
return new Selection(new SelectionResolver({ cross, empty, union: true })); | ||
static union({ cross = false, empty = false, include = [] } = {}) { | ||
return create({ cross, empty, union: true }, include); | ||
} | ||
@@ -63,6 +76,9 @@ | ||
* setting determines the default selection state. | ||
* @param {Selection|Selection[]} [options.include] Upstream selections whose | ||
* clauses should be included as part of the new selection. Any clauses | ||
* published to upstream selections will be relayed to the new selection. | ||
* @returns {Selection} The new Selection instance. | ||
*/ | ||
static single({ cross = false, empty = false } = {}) { | ||
return new Selection(new SelectionResolver({ cross, empty, single: true })); | ||
static single({ cross = false, empty = false, include = [] } = {}) { | ||
return create({ cross, empty, single: true }, include); | ||
} | ||
@@ -77,6 +93,9 @@ | ||
* setting determines the default selection state. | ||
* @param {Selection|Selection[]} [options.include] Upstream selections whose | ||
* clauses should be included as part of the new selection. Any clauses | ||
* published to upstream selections will be relayed to the new selection. | ||
* @returns {Selection} The new Selection instance. | ||
*/ | ||
static crossfilter({ empty = false } = {}) { | ||
return new Selection(new SelectionResolver({ cross: true, empty })); | ||
static crossfilter({ empty = false, include = [] } = {}) { | ||
return create({ cross: true, empty }, include); | ||
} | ||
@@ -86,9 +105,19 @@ | ||
* Create a new Selection instance. | ||
* @param {SelectionResolver} resolver The selection resolution | ||
* @param {SelectionResolver} [resolver] The selection resolution | ||
* strategy to apply. | ||
* @param {Selection[]} [include] Upstream selections whose clauses | ||
* should be included as part of this selection. Any clauses published | ||
* to these upstream selections will be relayed to this selection. | ||
*/ | ||
constructor(resolver = new SelectionResolver()) { | ||
constructor(resolver = new SelectionResolver(), include = []) { | ||
super([]); | ||
this._resolved = this._value; | ||
this._resolver = resolver; | ||
/** @type {Set<Selection>} */ | ||
this._relay = new Set; | ||
if (Array.isArray(include)) { | ||
for (const sel of include) { | ||
sel._relay.add(this); | ||
} | ||
} | ||
} | ||
@@ -170,2 +199,3 @@ | ||
this.emit('activate', clause); | ||
this._relay.forEach(sel => sel.activate(clause)); | ||
} | ||
@@ -183,2 +213,3 @@ | ||
this._resolved.active = clause; | ||
this._relay.forEach(sel => sel.update(clause)); | ||
return super.update(this._resolved); | ||
@@ -185,0 +216,0 @@ } |
import { Query, asRelation, count, isNull, max, min, sql } from '@uwdata/mosaic-sql'; | ||
import { jsType } from './js-type.js'; | ||
import { convertArrowValue } from './convert-arrow.js'; | ||
@@ -55,3 +54,3 @@ export const Count = 'count'; | ||
// query for summary stats | ||
const result = await mc.query( | ||
const [result] = await mc.query( | ||
summarize(table, column, stats), | ||
@@ -61,11 +60,4 @@ { persist: true } | ||
// extract summary stats, copy to field info | ||
for (let i = 0; i < result.numCols; ++i) { | ||
const { name } = result.schema.fields[i]; | ||
const child = result.getChildAt(i); | ||
const convert = convertArrowValue(child.type); | ||
info[name] = convert(child.get(0)); | ||
} | ||
return info; | ||
// extract summary stats, copy to field info, and return | ||
return Object.assign(info, result); | ||
} | ||
@@ -72,0 +64,0 @@ |
@@ -14,3 +14,3 @@ import { Query, agg, sql } from '@uwdata/mosaic-sql'; | ||
const q = client.query(); | ||
const from = getBaseTable(q); | ||
const from = getBase(q, q => q.from()?.[0].from.table); | ||
@@ -24,2 +24,9 @@ // bail if no base table or the query is not analyzable | ||
const avg = ref => { | ||
const name = ref.column; | ||
// @ts-ignore | ||
const expr = getBase(q, q => q.select().find(c => c.as === name)?.expr); | ||
return `(SELECT AVG(${expr ?? ref}) FROM "${from}")`; | ||
}; | ||
for (const entry of q.select()) { | ||
@@ -51,7 +58,7 @@ const { as, expr: { aggregate, args } } = entry; | ||
aux[as] = null; | ||
aggr.push({ [as]: varianceExpr(aux, args[0], from) }); | ||
aggr.push({ [as]: varianceExpr(aux, args[0], avg) }); | ||
break; | ||
case 'VAR_POP': | ||
aux[as] = null; | ||
aggr.push({ [as]: varianceExpr(aux, args[0], from, false) }); | ||
aggr.push({ [as]: varianceExpr(aux, args[0], avg, false) }); | ||
break; | ||
@@ -61,19 +68,19 @@ case 'STDDEV': | ||
aux[as] = null; | ||
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], from)})` }); | ||
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], avg)})` }); | ||
break; | ||
case 'STDDEV_POP': | ||
aux[as] = null; | ||
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], from, false)})` }); | ||
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], avg, false)})` }); | ||
break; | ||
case 'COVAR_SAMP': | ||
aux[as] = null; | ||
aggr.push({ [as]: covarianceExpr(aux, args, from) }); | ||
aggr.push({ [as]: covarianceExpr(aux, args, avg) }); | ||
break; | ||
case 'COVAR_POP': | ||
aux[as] = null; | ||
aggr.push({ [as]: covarianceExpr(aux, args, from, false) }); | ||
aggr.push({ [as]: covarianceExpr(aux, args, avg, false) }); | ||
break; | ||
case 'CORR': | ||
aux[as] = null; | ||
aggr.push({ [as]: corrExpr(aux, args, from) }); | ||
aggr.push({ [as]: corrExpr(aux, args, avg) }); | ||
break; | ||
@@ -96,23 +103,23 @@ | ||
aux[as] = null; | ||
aggr.push({ [as]: regrVarExpr(aux, 0, args, from) }); | ||
aggr.push({ [as]: regrVarExpr(aux, 0, args, avg) }); | ||
break; | ||
case 'REGR_SXX': | ||
aux[as] = null; | ||
aggr.push({ [as]: regrVarExpr(aux, 1, args, from) }); | ||
aggr.push({ [as]: regrVarExpr(aux, 1, args, avg) }); | ||
break; | ||
case 'REGR_SXY': | ||
aux[as] = null; | ||
aggr.push({ [as]: covarianceExpr(aux, args, from, null) }); | ||
aggr.push({ [as]: covarianceExpr(aux, args, avg, null) }); | ||
break; | ||
case 'REGR_SLOPE': | ||
aux[as] = null; | ||
aggr.push({ [as]: regrSlopeExpr(aux, args, from) }); | ||
aggr.push({ [as]: regrSlopeExpr(aux, args, avg) }); | ||
break; | ||
case 'REGR_INTERCEPT': | ||
aux[as] = null; | ||
aggr.push({ [as]: regrInterceptExpr(aux, args, from) }); | ||
aggr.push({ [as]: regrInterceptExpr(aux, args, avg) }); | ||
break; | ||
case 'REGR_R2': | ||
aux[as] = null; | ||
aggr.push({ [as]: agg`(${corrExpr(aux, args, from)}) ** 2` }); | ||
aggr.push({ [as]: agg`(${corrExpr(aux, args, avg)}) ** 2` }); | ||
break; | ||
@@ -171,25 +178,26 @@ | ||
/** | ||
* Identify a single base (source) table of a query. | ||
* Identify a shared base (source) query and extract a value from it. | ||
* This method is used to find a shared base table name or extract | ||
* the original column name within a base table. | ||
* @param {Query} query The input query. | ||
* @returns {string | undefined | NaN} the base table name, or | ||
* @param {(q: Query) => any} get A getter function to extract | ||
* a value from a base query. | ||
* @returns {string | undefined | NaN} the base query value, or | ||
* `undefined` if there is no source table, or `NaN` if the | ||
* query operates over multiple source tables. | ||
*/ | ||
function getBaseTable(query) { | ||
function getBase(query, get) { | ||
const subq = query.subqueries; | ||
// select query | ||
if (query.select) { | ||
const from = query.from(); | ||
// @ts-ignore | ||
if (!from.length) return undefined; | ||
if (subq.length === 0) return from[0].from.table; | ||
if (query.select && subq.length === 0) { | ||
return get(query); | ||
} | ||
// handle set operations / subqueries | ||
const base = getBaseTable(subq[0]); | ||
const base = getBase(subq[0], get); | ||
for (let i = 1; i < subq.length; ++i) { | ||
const from = getBaseTable(subq[i]); | ||
if (from === undefined) continue; | ||
if (from !== base) return NaN; | ||
const value = getBase(subq[i], get); | ||
if (value === undefined) continue; | ||
if (value !== base) return NaN; | ||
} | ||
@@ -234,13 +242,2 @@ return base; | ||
/** | ||
* Generate a scalar subquery for a global average. | ||
* This value can be used to mean-center data. | ||
* @param {*} x Souce data table column. | ||
* @param {string} from The source data table name. | ||
* @returns A scalar aggregate query | ||
*/ | ||
function avg(x, from) { | ||
return sql`(SELECT AVG(${x}) FROM "${from}")`; | ||
} | ||
/** | ||
* Generate an expression for calculating argmax over data partitions. | ||
@@ -293,3 +290,3 @@ * As a side effect, this method adds a column to the input *aux* object | ||
* column reference, SQL expression, or other string-coercible value. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @param {boolean} [correction=true] A flag for whether a Bessel | ||
@@ -301,7 +298,7 @@ * correction should be applied to compute the sample variance | ||
*/ | ||
function varianceExpr(aux, x, from, correction = true) { | ||
function varianceExpr(aux, x, avg, correction = true) { | ||
const n = countExpr(aux, x); | ||
const ssq = auxName('rssq', x); // residual sum of squares | ||
const sum = auxName('rsum', x); // residual sum | ||
const delta = sql`${x} - ${avg(x, from)}`; | ||
const delta = sql`${x} - ${avg(x)}`; | ||
aux[ssq] = agg`SUM((${delta}) ** 2)`; | ||
@@ -324,3 +321,3 @@ aux[sum] = agg`SUM(${delta})`; | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @param {boolean|null} [correction=true] A flag for whether a Bessel | ||
@@ -333,7 +330,7 @@ * correction should be applied to compute the sample covariance rather | ||
*/ | ||
function covarianceExpr(aux, args, from, correction = true) { | ||
function covarianceExpr(aux, args, avg, correction = true) { | ||
const n = regrCountExpr(aux, args); | ||
const sxy = regrSumXYExpr(aux, args, from); | ||
const sx = regrSumExpr(aux, 1, args, from); | ||
const sy = regrSumExpr(aux, 0, args, from); | ||
const sxy = regrSumXYExpr(aux, args, avg); | ||
const sx = regrSumExpr(aux, 1, args, avg); | ||
const sy = regrSumExpr(aux, 0, args, avg); | ||
const adj = correction === null ? '' // do not divide by count | ||
@@ -357,13 +354,13 @@ : correction ? ` / (${n} - 1)` // Bessel correction (sample) | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression for calculating correlation over | ||
* pre-aggregated data partitions. | ||
*/ | ||
function corrExpr(aux, args, from) { | ||
function corrExpr(aux, args, avg) { | ||
const n = regrCountExpr(aux, args); | ||
const sxy = regrSumXYExpr(aux, args, from); | ||
const sxx = regrSumSqExpr(aux, 1, args, from); | ||
const syy = regrSumSqExpr(aux, 0, args, from); | ||
const sx = regrSumExpr(aux, 1, args, from); | ||
const sy = regrSumExpr(aux, 0, args, from); | ||
const sxy = regrSumXYExpr(aux, args, avg); | ||
const sxx = regrSumSqExpr(aux, 1, args, avg); | ||
const syy = regrSumSqExpr(aux, 0, args, avg); | ||
const sx = regrSumExpr(aux, 1, args, avg); | ||
const sy = regrSumExpr(aux, 0, args, avg); | ||
const vx = agg`(${sxx} - (${sx} ** 2) / ${n})`; | ||
@@ -402,10 +399,10 @@ const vy = agg`(${syy} - (${sy} ** 2) / ${n})`; | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression over pre-aggregated data partitions. | ||
*/ | ||
function regrSumExpr(aux, i, args, from) { | ||
function regrSumExpr(aux, i, args, avg) { | ||
const v = args[i]; | ||
const o = args[1 - i]; | ||
const sum = auxName('rs', v); | ||
aux[sum] = agg`SUM(${v} - ${avg(v, from)}) FILTER (${o} IS NOT NULL)`; | ||
aux[sum] = agg`SUM(${v} - ${avg(v)}) FILTER (${o} IS NOT NULL)`; | ||
return agg`SUM(${sum})` | ||
@@ -425,10 +422,10 @@ } | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression over pre-aggregated data partitions. | ||
*/ | ||
function regrSumSqExpr(aux, i, args, from) { | ||
function regrSumSqExpr(aux, i, args, avg) { | ||
const v = args[i]; | ||
const u = args[1 - i]; | ||
const ssq = auxName('rss', v); | ||
aux[ssq] = agg`SUM((${v} - ${avg(v, from)}) ** 2) FILTER (${u} IS NOT NULL)`; | ||
aux[ssq] = agg`SUM((${v} - ${avg(v)}) ** 2) FILTER (${u} IS NOT NULL)`; | ||
return agg`SUM(${ssq})` | ||
@@ -447,9 +444,9 @@ } | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression over pre-aggregated data partitions. | ||
*/ | ||
function regrSumXYExpr(aux, args, from) { | ||
function regrSumXYExpr(aux, args, avg) { | ||
const [y, x] = args; | ||
const sxy = auxName('sxy', y, x); | ||
aux[sxy] = agg`SUM((${x} - ${avg(x, from)}) * (${y} - ${avg(y, from)}))`; | ||
aux[sxy] = agg`SUM((${x} - ${avg(x)}) * (${y} - ${avg(y)}))`; | ||
return agg`SUM(${sxy})`; | ||
@@ -507,10 +504,10 @@ } | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression for calculating variance over | ||
* pre-aggregated data partitions. | ||
*/ | ||
function regrVarExpr(aux, i, args, from) { | ||
function regrVarExpr(aux, i, args, avg) { | ||
const n = regrCountExpr(aux, args); | ||
const sum = regrSumExpr(aux, i, args, from); | ||
const ssq = regrSumSqExpr(aux, i, args, from); | ||
const sum = regrSumExpr(aux, i, args, avg); | ||
const ssq = regrSumSqExpr(aux, i, args, avg); | ||
return agg`(${ssq} - (${sum} ** 2 / ${n}))`; | ||
@@ -528,9 +525,9 @@ } | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression for calculating regression slopes over | ||
* pre-aggregated data partitions. | ||
*/ | ||
function regrSlopeExpr(aux, args, from) { | ||
const cov = covarianceExpr(aux, args, from, null); | ||
const varx = regrVarExpr(aux, 1, args, from); | ||
function regrSlopeExpr(aux, args, avg) { | ||
const cov = covarianceExpr(aux, args, avg, null); | ||
const varx = regrVarExpr(aux, 1, args, avg); | ||
return agg`(${cov}) / ${varx}`; | ||
@@ -548,11 +545,11 @@ } | ||
* column references, SQL expressions, or other string-coercible values. | ||
* @param {string} from The source data table name. | ||
* @param {(field: any) => string} avg Global average query generator. | ||
* @returns An aggregate expression for calculating regression intercepts over | ||
* pre-aggregated data partitions. | ||
*/ | ||
function regrInterceptExpr(aux, args, from) { | ||
function regrInterceptExpr(aux, args, avg) { | ||
const ax = regrAvgXExpr(aux, args); | ||
const ay = regrAvgYExpr(aux, args); | ||
const m = regrSlopeExpr(aux, args, from); | ||
const m = regrSlopeExpr(aux, args, avg); | ||
return agg`${ay} - (${m}) * ${ax}`; | ||
} |
@@ -1,85 +0,84 @@ | ||
/** | ||
* Create a new priority queue instance. | ||
* @param {number} ranks An integer number of rank-order priority levels. | ||
* @returns A priority queue instance. | ||
*/ | ||
export function priorityQueue(ranks) { | ||
// one list for each integer priority level | ||
const queue = Array.from( | ||
export class PriorityQueue { | ||
/** | ||
* Create a new priority queue instance. | ||
* @param {number} ranks An integer number of rank-order priority levels. | ||
*/ | ||
constructor(ranks) { | ||
// one list for each integer priority level | ||
this.queue = Array.from( | ||
{ length: ranks }, | ||
() => ({ head: null, tail: null }) | ||
); | ||
} | ||
return { | ||
/** | ||
* Indicate if the queue is empty. | ||
* @returns [boolean] true if empty, false otherwise. | ||
*/ | ||
isEmpty() { | ||
return queue.every(list => !list.head); | ||
}, | ||
/** | ||
* Indicate if the queue is empty. | ||
* @returns {boolean} true if empty, false otherwise. | ||
*/ | ||
isEmpty() { | ||
return this.queue.every(list => !list.head); | ||
} | ||
/** | ||
* Insert an item into the queue with a given priority rank. | ||
* @param {*} item The item to add. | ||
* @param {number} rank The integer priority rank. | ||
* Priority ranks are integers starting at zero. | ||
* Lower ranks indicate higher priority. | ||
*/ | ||
insert(item, rank) { | ||
const list = queue[rank]; | ||
if (!list) { | ||
throw new Error(`Invalid queue priority rank: ${rank}`); | ||
} | ||
/** | ||
* Insert an item into the queue with a given priority rank. | ||
* @param {*} item The item to add. | ||
* @param {number} rank The integer priority rank. | ||
* Priority ranks are integers starting at zero. | ||
* Lower ranks indicate higher priority. | ||
*/ | ||
insert(item, rank) { | ||
const list = this.queue[rank]; | ||
if (!list) { | ||
throw new Error(`Invalid queue priority rank: ${rank}`); | ||
} | ||
const node = { item, next: null }; | ||
if (list.head === null) { | ||
list.head = list.tail = node; | ||
} else { | ||
list.tail = (list.tail.next = node); | ||
} | ||
}, | ||
const node = { item, next: null }; | ||
if (list.head === null) { | ||
list.head = list.tail = node; | ||
} else { | ||
list.tail = list.tail.next = node; | ||
} | ||
} | ||
/** | ||
* Remove a set of items from the queue, regardless of priority rank. | ||
* If a provided item is not in the queue it will be ignored. | ||
* @param {(item: *) => boolean} test A predicate function to test | ||
* if an item should be removed (true to drop, false to keep). | ||
*/ | ||
remove(test) { | ||
for (const list of queue) { | ||
let { head, tail } = list; | ||
for (let prev = null, curr = head; curr; prev = curr, curr = curr.next) { | ||
if (test(curr.item)) { | ||
if (curr === head) { | ||
head = curr.next; | ||
} else { | ||
prev.next = curr.next; | ||
} | ||
if (curr === tail) tail = prev || head; | ||
} | ||
} | ||
list.head = head; | ||
list.tail = tail; | ||
} | ||
}, | ||
/** | ||
* Remove a set of items from the queue, regardless of priority rank. | ||
* If a provided item is not in the queue it will be ignored. | ||
* @param {(item: *) => boolean} test A predicate function to test | ||
* if an item should be removed (true to drop, false to keep). | ||
*/ | ||
remove(test) { | ||
for (const list of this.queue) { | ||
let { head, tail } = list; | ||
for (let prev = null, curr = head; curr; prev = curr, curr = curr.next) { | ||
if (test(curr.item)) { | ||
if (curr === head) { | ||
head = curr.next; | ||
} else { | ||
prev.next = curr.next; | ||
} | ||
if (curr === tail) tail = prev || head; | ||
} | ||
} | ||
list.head = head; | ||
list.tail = tail; | ||
} | ||
} | ||
/** | ||
* Remove and return the next highest priority item. | ||
* @returns {*} The next item in the queue, | ||
* or undefined if this queue is empty. | ||
*/ | ||
next() { | ||
for (const list of queue) { | ||
const { head } = list; | ||
if (head !== null) { | ||
list.head = head.next; | ||
if (list.tail === head) { | ||
list.tail = null; | ||
} | ||
return head.item; | ||
} | ||
} | ||
} | ||
}; | ||
/** | ||
* Remove and return the next highest priority item. | ||
* @returns {*} The next item in the queue, | ||
* or undefined if this queue is empty. | ||
*/ | ||
next() { | ||
for (const list of this.queue) { | ||
const { head } = list; | ||
if (head !== null) { | ||
list.head = head.next; | ||
if (list.tail === head) { | ||
list.tail = null; | ||
} | ||
return head.item; | ||
} | ||
} | ||
} | ||
} |
const NIL = {}; | ||
/** | ||
* Throttle invocations of a callback function. The callback must return | ||
* a Promise. Upon repeated invocation, the callback will not be invoked | ||
* until a prior Promise resolves. If multiple invocations occurs while | ||
* waiting, only the most recent invocation will be pending. | ||
* @param {(event: *) => Promise} callback The callback function. | ||
* @param {boolean} [debounce=true] Flag indicating if invocations | ||
* should also be debounced within the current animation frame. | ||
* @returns A new function that throttles access to the callback. | ||
*/ | ||
export function throttle(callback, debounce = false) { | ||
@@ -9,3 +19,3 @@ let curr; | ||
function invoke(event) { | ||
curr = callback(event).then(() => { | ||
curr = callback(event).finally(() => { | ||
if (next) { | ||
@@ -12,0 +22,0 @@ const { value } = next; |
@@ -1,2 +0,2 @@ | ||
import { convertArrowColumn, isArrowTable } from './convert-arrow.js'; | ||
import { isArrowTable } from './is-arrow-table.js'; | ||
@@ -30,19 +30,8 @@ /** | ||
* Convert an Arrow table to a set of column arrays. | ||
* @param {import('apache-arrow').Table} data An Apache Arrow Table. | ||
* @param {import('@uwdata/flechette').Table} data An Arrow Table. | ||
* @returns {DataColumns} An object with named column arrays. | ||
*/ | ||
function arrowToColumns(data) { | ||
const { numRows, numCols, schema: { fields } } = data; | ||
const columns = {}; | ||
for (let col = 0; col < numCols; ++col) { | ||
const name = fields[col].name; | ||
if (columns[name]) { | ||
console.warn(`Redundant column name "${name}". Skipping...`); | ||
} else { | ||
columns[name] = convertArrowColumn(data.getChildAt(col)); | ||
} | ||
} | ||
return { numRows, columns }; | ||
const { numRows } = data; | ||
return { numRows, columns: data.toColumns() }; | ||
} | ||
@@ -49,0 +38,0 @@ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
33
995751
22175
+ Added@uwdata/flechette@^1.0.2
+ Added@uwdata/flechette@1.1.1(transitive)
+ Added@uwdata/mosaic-sql@0.11.0(transitive)
- Removedapache-arrow@^16.1.0
- Removed@uwdata/mosaic-sql@0.10.0(transitive)
- Removedapache-arrow@16.1.0(transitive)
Updated@uwdata/mosaic-sql@^0.11.0