nodejs-polars
Advanced tools
Comparing version 0.2.1 to 0.3.0
@@ -300,2 +300,24 @@ /// <reference types="node" /> | ||
/** | ||
* | ||
* | ||
* __Extend the memory backed by this `DataFrame` with the values from `other`.__ | ||
* ___ | ||
Different from `vstack` which adds the chunks from `other` to the chunks of this `DataFrame` | ||
`extent` appends the data from `other` to the underlying memory locations and thus may cause a reallocation. | ||
If this does not cause a reallocation, the resulting data structure will not have any extra chunks | ||
and thus will yield faster queries. | ||
Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during | ||
online operations where you add `n` rows and rerun a query. | ||
Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance | ||
when you read in multiple files and when to store them in a single `DataFrame`. | ||
In the latter case, finish the sequence of `vstack` operations with a `rechunk`. | ||
* @param other DataFrame to vertically add. | ||
*/ | ||
extend(other: DataFrame): DataFrame; | ||
/** | ||
* Fill null/missing values by a filling strategy | ||
@@ -302,0 +324,0 @@ * |
@@ -180,2 +180,5 @@ "use strict"; | ||
}, | ||
extend(other) { | ||
return wrap("extend", { other: other._df }); | ||
}, | ||
filter(predicate) { | ||
@@ -182,0 +185,0 @@ return this |
@@ -5,3 +5,3 @@ import * as series from "./series/series"; | ||
import * as func from "./functions"; | ||
import io from "./io"; | ||
import * as io from "./io"; | ||
import * as cfg from "./cfg"; | ||
@@ -8,0 +8,0 @@ import type { FillNullStrategy as _FillNullStrategy } from "./utils"; |
@@ -21,5 +21,2 @@ "use strict"; | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
const series = __importStar(require("./series/series")); | ||
@@ -29,3 +26,3 @@ const df = __importStar(require("./dataframe")); | ||
const func = __importStar(require("./functions")); | ||
const io_1 = __importDefault(require("./io")); | ||
const io = __importStar(require("./io")); | ||
const cfg = __importStar(require("./cfg")); | ||
@@ -61,11 +58,11 @@ const package_json_1 = require("../package.json"); | ||
// IO | ||
pl.scanCSV = io_1.default.scanCSV; | ||
pl.scanIPC = io_1.default.scanIPC; | ||
pl.scanParquet = io_1.default.scanParquet; | ||
pl.readCSV = io_1.default.readCSV; | ||
pl.readIPC = io_1.default.readIPC; | ||
pl.readJSON = io_1.default.readJSON; | ||
pl.readParquet = io_1.default.readParquet; | ||
pl.readCSVStream = io_1.default.readCSVStream; | ||
pl.readJSONStream = io_1.default.readJSONStream; | ||
pl.scanCSV = io.scanCSV; | ||
pl.scanIPC = io.scanIPC; | ||
pl.scanParquet = io.scanParquet; | ||
pl.readCSV = io.readCSV; | ||
pl.readIPC = io.readIPC; | ||
pl.readJSON = io.readJSON; | ||
pl.readParquet = io.readParquet; | ||
pl.readCSVStream = io.readCSVStream; | ||
pl.readJSONStream = io.readJSONStream; | ||
// lazy | ||
@@ -72,0 +69,0 @@ pl.col = lazy_1.funcs.col; |
527
bin/io.d.ts
@@ -16,3 +16,8 @@ /// <reference types="node" /> | ||
rechunk?: boolean; | ||
rowCount?: RowCount; | ||
}; | ||
declare type RowCount = { | ||
name: string; | ||
offset?: number; | ||
}; | ||
declare type ReadCsvOptions = { | ||
@@ -36,2 +41,3 @@ batchSize?: number; | ||
startRows?: number; | ||
rowCount?: RowCount; | ||
}; | ||
@@ -48,2 +54,3 @@ declare type ReadJsonOptions = { | ||
rechunk?: boolean; | ||
rowCount?: RowCount; | ||
}; | ||
@@ -54,277 +61,247 @@ declare type ReadIPCOptions = { | ||
numRows?: number; | ||
rowCount?: RowCount; | ||
}; | ||
declare namespace io { | ||
/** | ||
* __Read a CSV file or string into a Dataframe.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns DataFrame | ||
*/ | ||
interface readCSV { | ||
(pathOrBody: string | Buffer, options?: Partial<ReadCsvOptions>): DataFrame; | ||
} | ||
/** | ||
* __Lazily read from a CSV file or multiple files via glob patterns.__ | ||
* | ||
* This allows the query optimizer to push down predicates and | ||
* projections to the scan level, thereby potentially reducing | ||
* memory overhead. | ||
* ___ | ||
* @param path path to a file | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* ___ | ||
* | ||
*/ | ||
interface scanCSV { | ||
(path: string, options?: Partial<ReadCsvOptions>): LazyDataFrame; | ||
} | ||
/** | ||
* __Read a JSON file or string into a DataFrame.__ | ||
* | ||
* _Note: Currently only newline delimited JSON is supported_ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @returns ({@link DataFrame}) | ||
* @example | ||
* ``` | ||
* const jsonString = ` | ||
* {"a", 1, "b", "foo", "c": 3} | ||
* {"a": 2, "b": "bar", "c": 6} | ||
* ` | ||
* > const df = pl.readJSON(jsonString) | ||
* > console.log(df) | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ foo ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ bar ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
interface readJSON { | ||
(pathOrBody: string | Buffer, options?: Partial<ReadJsonOptions>): DataFrame; | ||
} | ||
/** | ||
* Read into a DataFrame from a parquet file. | ||
* @param pathOrBuffer | ||
* Path to a file, list of files, or a file like object. If the path is a directory, that directory will be used | ||
* as partition aware scan. | ||
* @param options.columns Columns to select. Accepts a list of column names. | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
*/ | ||
interface readParquet { | ||
(pathOrBody: string | Buffer, options?: ReadParquetOptions): DataFrame; | ||
} | ||
/** | ||
* __Lazily read from a parquet file or multiple files via glob patterns.__ | ||
* ___ | ||
* This allows the query optimizer to push down predicates and projections to the scan level, | ||
* thereby potentially reducing memory overhead. | ||
* @param path Path to a file or or glob pattern | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
* @param options.rechunk In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. | ||
*/ | ||
interface scanParquet { | ||
(path: string, options?: ScanParquetOptions): LazyDataFrame; | ||
} | ||
/** | ||
* __Read into a DataFrame from Arrow IPC (Feather v2) file.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.ipc`. | ||
* - body: String or buffer to be read as Arrow IPC | ||
* @param options.columns Columns to select. Accepts a list of column names. | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
*/ | ||
interface readIPC { | ||
(pathOrBody: string | Buffer, options?: ReadIPCOptions): DataFrame; | ||
} | ||
/** | ||
* __Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.__ | ||
* ___ | ||
* @param path Path to a IPC file. | ||
* @param options.numRows Stop reading from IPC file after reading ``numRows`` | ||
* @param options.cache Cache the result after reading. | ||
* @param options.rechunk Reallocate to contiguous memory when all chunks/ files are parsed. | ||
*/ | ||
interface scanIPC { | ||
(path: string, options?: ScanIPCOptions): LazyDataFrame; | ||
} | ||
/** | ||
* __Read a stream into a Dataframe.__ | ||
* | ||
* **Warning:** this is much slower than `scanCSV` or `readCSV` | ||
* | ||
* This will consume the entire stream into a single buffer and then call `readCSV` | ||
* Only use it when you must consume from a stream, or when performance is not a major consideration | ||
* | ||
* ___ | ||
* @param stream - readable stream containing csv data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns Promise<DataFrame> | ||
* | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`a,b\n`); | ||
* >>> readStream.push(`1,2\n`); | ||
* >>> readStream.push(`2,2\n`); | ||
* >>> readStream.push(`3,2\n`); | ||
* >>> readStream.push(`4,2\n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readCSVStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
interface readCSVStream { | ||
(stream: Readable, options?: ReadCsvOptions): Promise<DataFrame>; | ||
} | ||
/** | ||
* __Read a newline delimited JSON stream into a DataFrame.__ | ||
* | ||
* @param stream - readable stream containing json data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* Note: this is done per batch | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`${JSON.stringify({a: 1, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 2, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 3, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 4, b: 2})} \n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readJSONStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
interface readJSONStream { | ||
(stream: Readable, options?: ReadJsonOptions): Promise<DataFrame>; | ||
} | ||
} | ||
declare namespace io { | ||
function readCSV(pathOrBody: any, options?: any): DataFrame; | ||
function scanCSV(path: any, options?: any): LazyDataFrame; | ||
function readJSON(pathOrBody: any, options?: any): DataFrame; | ||
function readParquet(pathOrBody: any, options?: any): DataFrame; | ||
function scanParquet(path: any, options?: any): LazyDataFrame; | ||
function readIPC(pathOrBody: any, options?: any): DataFrame; | ||
function scanIPC(path: any, options?: any): LazyDataFrame; | ||
function readCSVStream(stream: any, options?: any): Promise<unknown>; | ||
function readJSONStream(stream: any, options?: any): Promise<unknown>; | ||
} | ||
export = io; | ||
/** | ||
* __Read a CSV file or string into a Dataframe.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns DataFrame | ||
*/ | ||
export declare function readCSV(pathOrBody: string | Buffer, options?: Partial<ReadCsvOptions>): DataFrame; | ||
/** | ||
* __Lazily read from a CSV file or multiple files via glob patterns.__ | ||
* | ||
* This allows the query optimizer to push down predicates and | ||
* projections to the scan level, thereby potentially reducing | ||
* memory overhead. | ||
* ___ | ||
* @param path path to a file | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* ___ | ||
* | ||
*/ | ||
export declare function scanCSV(path: string, options?: Partial<ReadCsvOptions>): LazyDataFrame; | ||
/** | ||
* __Read a JSON file or string into a DataFrame.__ | ||
* | ||
* _Note: Currently only newline delimited JSON is supported_ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @returns ({@link DataFrame}) | ||
* @example | ||
* ``` | ||
* const jsonString = ` | ||
* {"a", 1, "b", "foo", "c": 3} | ||
* {"a": 2, "b": "bar", "c": 6} | ||
* ` | ||
* > const df = pl.readJSON(jsonString) | ||
* > console.log(df) | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ foo ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ bar ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
export declare function readJSON(pathOrBody: string | Buffer, options?: Partial<ReadJsonOptions>): DataFrame; | ||
/** | ||
* Read into a DataFrame from a parquet file. | ||
* @param pathOrBuffer | ||
* Path to a file, list of files, or a file like object. If the path is a directory, that directory will be used | ||
* as partition aware scan. | ||
* @param options.columns Columns to select. Accepts a list of column names. | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
*/ | ||
export declare function readParquet(pathOrBody: string | Buffer, options?: ReadParquetOptions): DataFrame; | ||
/** | ||
* __Lazily read from a parquet file or multiple files via glob patterns.__ | ||
* ___ | ||
* This allows the query optimizer to push down predicates and projections to the scan level, | ||
* thereby potentially reducing memory overhead. | ||
* @param path Path to a file or or glob pattern | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
* @param options.rechunk In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. | ||
*/ | ||
export declare function scanParquet(path: string, options?: ScanParquetOptions): LazyDataFrame; | ||
/** | ||
* __Read into a DataFrame from Arrow IPC (Feather v2) file.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.ipc`. | ||
* - body: String or buffer to be read as Arrow IPC | ||
* @param options.columns Columns to select. Accepts a list of column names. | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
*/ | ||
export declare function readIPC(pathOrBody: string | Buffer, options?: ReadIPCOptions): DataFrame; | ||
/** | ||
* __Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.__ | ||
* ___ | ||
* @param path Path to a IPC file. | ||
* @param options.numRows Stop reading from IPC file after reading ``numRows`` | ||
* @param options.cache Cache the result after reading. | ||
* @param options.rechunk Reallocate to contiguous memory when all chunks/ files are parsed. | ||
*/ | ||
export declare function scanIPC(path: string, options?: ScanIPCOptions): LazyDataFrame; | ||
/** | ||
* __Read a stream into a Dataframe.__ | ||
* | ||
* **Warning:** this is much slower than `scanCSV` or `readCSV` | ||
* | ||
* This will consume the entire stream into a single buffer and then call `readCSV` | ||
* Only use it when you must consume from a stream, or when performance is not a major consideration | ||
* | ||
* ___ | ||
* @param stream - readable stream containing csv data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns Promise<DataFrame> | ||
* | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`a,b\n`); | ||
* >>> readStream.push(`1,2\n`); | ||
* >>> readStream.push(`2,2\n`); | ||
* >>> readStream.push(`3,2\n`); | ||
* >>> readStream.push(`4,2\n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readCSVStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
export declare function readCSVStream(stream: Readable, options?: ReadCsvOptions): Promise<DataFrame>; | ||
/** | ||
* __Read a newline delimited JSON stream into a DataFrame.__ | ||
* | ||
* @param stream - readable stream containing json data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* Note: this is done per batch | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`${JSON.stringify({a: 1, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 2, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 3, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 4, b: 2})} \n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readJSONStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
export declare function readJSONStream(stream: Readable, options?: ReadJsonOptions): Promise<DataFrame>; | ||
export {}; |
293
bin/io.js
@@ -17,2 +17,4 @@ "use strict"; | ||
var _LineBatcher_lines, _LineBatcher_accumulatedLines, _LineBatcher_batchSize; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.readJSONStream = exports.readCSVStream = exports.scanIPC = exports.readIPC = exports.scanParquet = exports.readParquet = exports.readJSON = exports.scanCSV = exports.readCSV = void 0; | ||
const polars_internal_1 = __importDefault(require("./internals/polars_internal")); | ||
@@ -40,149 +42,2 @@ const dataframe_1 = require("./dataframe"); | ||
}; | ||
// Implementation | ||
var io; | ||
(function (io) { | ||
function readCSV(pathOrBody, options) { | ||
const extensions = [".tsv", ".csv"]; | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readCSVBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, extensions); | ||
if (inline) { | ||
return readCSVBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readCSVPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
io.readCSV = readCSV; | ||
function scanCSV(path, options) { | ||
options = { ...readCsvDefaultOptions, ...options }; | ||
return (0, dataframe_2.LazyDataFrame)(polars_internal_1.default.ldf.scanCSV({ path, ...options })); | ||
} | ||
io.scanCSV = scanCSV; | ||
function readJSON(pathOrBody, options) { | ||
const extensions = [".ndjson", ".json", ".jsonl"]; | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readJSONBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, extensions); | ||
if (inline) { | ||
return readJSONBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readJSONPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
io.readJSON = readJSON; | ||
function readParquet(pathOrBody, options) { | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readParquetBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, [".parquet"]); | ||
if (inline) { | ||
return readParquetBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readParquetPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
io.readParquet = readParquet; | ||
function scanParquet(path, options) { | ||
return (0, dataframe_2.LazyDataFrame)(polars_internal_1.default.ldf.scanParquet({ path, ...options })); | ||
} | ||
io.scanParquet = scanParquet; | ||
function readIPC(pathOrBody, options) { | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readIPCBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, [".ipc"]); | ||
if (inline) { | ||
return readIPCBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readIPCPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
io.readIPC = readIPC; | ||
function scanIPC(path, options) { | ||
return (0, dataframe_2.LazyDataFrame)(polars_internal_1.default.ldf.scanIPC({ path, ...options })); | ||
} | ||
io.scanIPC = scanIPC; | ||
function readCSVStream(stream, options) { | ||
let batchSize = options?.batchSize ?? 10000; | ||
let count = 0; | ||
let end = options?.endRows ?? Number.POSITIVE_INFINITY; | ||
return new Promise((resolve, reject) => { | ||
const s = stream.pipe(new LineBatcher({ batchSize })); | ||
const chunks = []; | ||
s.on("data", (chunk) => { | ||
// early abort if 'end rows' is specified | ||
if (count <= end) { | ||
chunks.push(chunk); | ||
} | ||
else { | ||
s.end(); | ||
} | ||
count += batchSize; | ||
}).on("end", () => { | ||
try { | ||
let buff = Buffer.concat(chunks); | ||
const df = readCSVBuffer(buff, options); | ||
resolve(df); | ||
} | ||
catch (err) { | ||
reject(err); | ||
} | ||
}); | ||
}); | ||
} | ||
io.readCSVStream = readCSVStream; | ||
function readJSONStream(stream, options) { | ||
let batchSize = options?.batchSize ?? 10000; | ||
return new Promise((resolve, reject) => { | ||
const chunks = []; | ||
stream | ||
.pipe(new LineBatcher({ batchSize })) | ||
.on("data", (chunk) => { | ||
try { | ||
const df = readJSONBuffer(chunk, options); | ||
chunks.push(df); | ||
} | ||
catch (err) { | ||
reject(err); | ||
} | ||
}) | ||
.on("end", () => { | ||
try { | ||
const df = (0, functions_1.concat)(chunks); | ||
resolve(df); | ||
} | ||
catch (err) { | ||
reject(err); | ||
} | ||
}); | ||
}); | ||
} | ||
io.readJSONStream = readJSONStream; | ||
})(io || (io = {})); | ||
// utility to read streams as lines. | ||
@@ -252,2 +107,144 @@ class LineBatcher extends stream_1.Stream.Transform { | ||
} | ||
module.exports = io; | ||
function readCSV(pathOrBody, options) { | ||
const extensions = [".tsv", ".csv"]; | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readCSVBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, extensions); | ||
if (inline) { | ||
return readCSVBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readCSVPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
exports.readCSV = readCSV; | ||
function scanCSV(path, options) { | ||
options = { ...readCsvDefaultOptions, ...options }; | ||
return (0, dataframe_2.LazyDataFrame)(polars_internal_1.default.ldf.scanCSV({ path, ...options })); | ||
} | ||
exports.scanCSV = scanCSV; | ||
function readJSON(pathOrBody, options) { | ||
const extensions = [".ndjson", ".json", ".jsonl"]; | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readJSONBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, extensions); | ||
if (inline) { | ||
return readJSONBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readJSONPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
exports.readJSON = readJSON; | ||
function readParquet(pathOrBody, options) { | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readParquetBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, [".parquet"]); | ||
if (inline) { | ||
return readParquetBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readParquetPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
exports.readParquet = readParquet; | ||
function scanParquet(path, options) { | ||
return (0, dataframe_2.LazyDataFrame)(polars_internal_1.default.ldf.scanParquet({ path, ...options })); | ||
} | ||
exports.scanParquet = scanParquet; | ||
function readIPC(pathOrBody, options) { | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return readIPCBuffer(pathOrBody, options); | ||
} | ||
if (typeof pathOrBody === "string") { | ||
const inline = !(0, utils_1.isPath)(pathOrBody, [".ipc"]); | ||
if (inline) { | ||
return readIPCBuffer(Buffer.from(pathOrBody, "utf-8"), options); | ||
} | ||
else { | ||
return readIPCPath(pathOrBody, options); | ||
} | ||
} | ||
else { | ||
throw new Error("must supply either a path or body"); | ||
} | ||
} | ||
exports.readIPC = readIPC; | ||
function scanIPC(path, options) { | ||
return (0, dataframe_2.LazyDataFrame)(polars_internal_1.default.ldf.scanIPC({ path, ...options })); | ||
} | ||
exports.scanIPC = scanIPC; | ||
function readCSVStream(stream, options) { | ||
let batchSize = options?.batchSize ?? 10000; | ||
let count = 0; | ||
let end = options?.endRows ?? Number.POSITIVE_INFINITY; | ||
return new Promise((resolve, reject) => { | ||
const s = stream.pipe(new LineBatcher({ batchSize })); | ||
const chunks = []; | ||
s.on("data", (chunk) => { | ||
// early abort if 'end rows' is specified | ||
if (count <= end) { | ||
chunks.push(chunk); | ||
} | ||
else { | ||
s.end(); | ||
} | ||
count += batchSize; | ||
}).on("end", () => { | ||
try { | ||
let buff = Buffer.concat(chunks); | ||
const df = readCSVBuffer(buff, options); | ||
resolve(df); | ||
} | ||
catch (err) { | ||
reject(err); | ||
} | ||
}); | ||
}); | ||
} | ||
exports.readCSVStream = readCSVStream; | ||
function readJSONStream(stream, options) { | ||
let batchSize = options?.batchSize ?? 10000; | ||
return new Promise((resolve, reject) => { | ||
const chunks = []; | ||
stream | ||
.pipe(new LineBatcher({ batchSize })) | ||
.on("data", (chunk) => { | ||
try { | ||
const df = readJSONBuffer(chunk, options); | ||
chunks.push(df); | ||
} | ||
catch (err) { | ||
reject(err); | ||
} | ||
}) | ||
.on("end", () => { | ||
try { | ||
const df = (0, functions_1.concat)(chunks); | ||
resolve(df); | ||
} | ||
catch (err) { | ||
reject(err); | ||
} | ||
}); | ||
}); | ||
} | ||
exports.readJSONStream = readJSONStream; |
@@ -154,2 +154,4 @@ import { DataType } from "../datatypes"; | ||
* @param n The number of values to extend. | ||
* @deprecated | ||
* @see {@link extendConstant} | ||
*/ | ||
@@ -161,2 +163,12 @@ extend(value: any, n: number): Expr; | ||
}): Expr; | ||
/** | ||
* Extend the Series with given number of values. | ||
* @param value The value to extend the Series with. This value may be null to fill with nulls. | ||
* @param n The number of values to extend. | ||
*/ | ||
extendConstant(value: any, n: number): Expr; | ||
extendConstant(opt: { | ||
value: any; | ||
n: number; | ||
}): Expr; | ||
/** Fill nan value with a fill value */ | ||
@@ -163,0 +175,0 @@ fillNan(other: any): Expr; |
@@ -127,6 +127,12 @@ "use strict"; | ||
if (n !== null && typeof n === "number") { | ||
return wrap("extend", { value: o, n }); | ||
return wrap("extendConstant", { value: o, n }); | ||
} | ||
return wrap("extend", o); | ||
return wrap("extendConstant", o); | ||
}, | ||
extendConstant(o, n) { | ||
if (n !== null && typeof n === "number") { | ||
return wrap("extendConstant", { value: o, n }); | ||
} | ||
return wrap("extendConstant", o); | ||
}, | ||
fillNan: wrapExprArg("fillNan", true), | ||
@@ -133,0 +139,0 @@ fillNull(fillValue) { |
@@ -15,2 +15,9 @@ import { Expr } from "../expr"; | ||
first(): Expr; | ||
/** | ||
* Join all string items in a sublist and place a separator between them. | ||
* This errors if inner type of list `!= Utf8`. | ||
* @param separator A string used to separate one element of the list from the next in the resulting string. | ||
* If omitted, the list elements are separated with a comma. | ||
*/ | ||
join(separator?: string): Expr; | ||
/** Get the last value of the sublists. */ | ||
@@ -17,0 +24,0 @@ last(): Expr; |
@@ -20,2 +20,5 @@ "use strict"; | ||
}, | ||
join(separator = ",") { | ||
return wrap("join", { separator }); | ||
}, | ||
last() { | ||
@@ -22,0 +25,0 @@ return wrap("get", { index: -1 }); |
@@ -162,6 +162,14 @@ import { DataType } from "../../datatypes"; | ||
/** | ||
* Parse a Series of dtype Utf8 to a Date/Datetime Series. | ||
* @param datatype Date or Datetime. | ||
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html) | ||
*/ | ||
* Split a string into substrings using the specified separator and return them as a Series. | ||
* @param separator — A string that identifies character or characters to use in separating the string. | ||
* @param inclusive Include the split character/string in the results | ||
*/ | ||
split(by: string, options?: { | ||
inclusive?: boolean; | ||
} | boolean): Expr; | ||
/** | ||
* Parse a Series of dtype Utf8 to a Date/Datetime Series. | ||
* @param datatype Date or Datetime. | ||
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html) | ||
*/ | ||
strftime(datatype: DataType.Date, fmt?: string): Expr; | ||
@@ -168,0 +176,0 @@ strftime(datatype: DataType.Datetime, fmt?: string): Expr; |
@@ -72,2 +72,6 @@ "use strict"; | ||
}, | ||
split(by, options) { | ||
const inclusive = typeof options === "boolean" ? options : options?.inclusive; | ||
return wrap("split", { by, inclusive }); | ||
}, | ||
strftime(dtype, fmt) { | ||
@@ -74,0 +78,0 @@ if (dtype === datatypes_1.DataType.Date) { |
{ | ||
"name": "nodejs-polars", | ||
"version": "0.2.1", | ||
"version": "0.3.0", | ||
"repository": "https://github.com/pola-rs/polars.git", | ||
@@ -5,0 +5,0 @@ "license": "SEE LICENSE IN LICENSE", |
@@ -5,2 +5,9 @@ import { JsSeries, Series } from "./series"; | ||
first(): Series<T>; | ||
/** | ||
* Join all string items in a sublist and place a separator between them. | ||
* This errors if inner type of list `!= Utf8`. | ||
* @param separator A string used to separate one element of the list from the next in the resulting string. | ||
* If omitted, the list elements are separated with a comma. | ||
*/ | ||
join(separator?: string): Series<string>; | ||
last(): Series<T>; | ||
@@ -7,0 +14,0 @@ /** Get the length of the arrays as UInt32. */ |
@@ -19,2 +19,3 @@ "use strict"; | ||
first: callExpr("first"), | ||
join: callExpr("join"), | ||
last: callExpr("last"), | ||
@@ -21,0 +22,0 @@ lengths: callExpr("lengths"), |
@@ -210,6 +210,8 @@ import { DataType, DtypeToPrimitive, Optional } from "../datatypes"; | ||
/** | ||
* Extend the Series with given number of values. | ||
* @param value The value to extend the Series with. This value may be null to fill with nulls. | ||
* @param n The number of values to extend. | ||
*/ | ||
* Extend the Series with given number of values. | ||
* @param value The value to extend the Series with. This value may be null to fill with nulls. | ||
* @param n The number of values to extend. | ||
* @deprecated | ||
* @see {@link extendConstant} | ||
*/ | ||
extend(value: any, n: number): Series<T>; | ||
@@ -221,2 +223,12 @@ extend(opt: { | ||
/** | ||
* Extend the Series with given number of values. | ||
* @param value The value to extend the Series with. This value may be null to fill with nulls. | ||
* @param n The number of values to extend. | ||
*/ | ||
extendConstant(value: any, n: number): Series<T>; | ||
extendConstant(opt: { | ||
value: any; | ||
n: number; | ||
}): Series<T>; | ||
/** | ||
* __Fill null values with a filling strategy.__ | ||
@@ -223,0 +235,0 @@ * ___ |
@@ -241,6 +241,9 @@ "use strict"; | ||
extend(o, n) { | ||
return this.extendConstant(o, n); | ||
}, | ||
extendConstant(o, n) { | ||
if (n !== null && typeof n === "number") { | ||
return wrap("extend", { value: o, n }); | ||
return wrap("extend_constant", { value: o, n }); | ||
} | ||
return wrap("extend", o); | ||
return wrap("extend_constant", o); | ||
}, | ||
@@ -247,0 +250,0 @@ fillNull(strategy) { |
@@ -153,2 +153,11 @@ import { DataType } from "../datatypes"; | ||
/** | ||
* Split a string into substrings using the specified separator. | ||
* The return type will by of type List<Utf8> | ||
* @param separator — A string that identifies character or characters to use in separating the string. | ||
* @param inclusive Include the split character/string in the results | ||
*/ | ||
split(separator: string, options?: { | ||
inclusive?: boolean; | ||
} | boolean): Series<Series<string>>; | ||
/** | ||
* Parse a Series of dtype Utf8 to a Date/Datetime Series. | ||
@@ -155,0 +164,0 @@ * @param datatype Date or Datetime. |
@@ -80,2 +80,13 @@ "use strict"; | ||
}, | ||
split(by, options) { | ||
const inclusive = typeof options === "boolean" ? options : options?.inclusive; | ||
const s = (0, series_1.seriesWrapper)(_s); | ||
return s | ||
.toFrame() | ||
.select((0, functions_1.col)(s.name) | ||
.str | ||
.split(by, inclusive) | ||
.as(s.name)) | ||
.getColumn(s.name); | ||
}, | ||
strftime(dtype, fmt) { | ||
@@ -82,0 +93,0 @@ if (dtype === datatypes_1.DataType.Date) { |
{ | ||
"name": "nodejs-polars", | ||
"version": "0.2.1", | ||
"version": "0.3.0", | ||
"repository": "https://github.com/pola-rs/polars.git", | ||
@@ -89,14 +89,14 @@ "license": "SEE LICENSE IN LICENSE", | ||
"optionalDependencies": { | ||
"nodejs-polars-win32-x64-msvc": "0.2.1", | ||
"nodejs-polars-darwin-x64": "0.2.1", | ||
"nodejs-polars-linux-x64-gnu": "0.2.1", | ||
"nodejs-polars-win32-ia32-msvc": "0.2.1", | ||
"nodejs-polars-linux-arm64-gnu": "0.2.1", | ||
"nodejs-polars-linux-arm-gnueabihf": "0.2.1", | ||
"nodejs-polars-darwin-arm64": "0.2.1", | ||
"nodejs-polars-android-arm64": "0.2.1", | ||
"nodejs-polars-linux-x64-musl": "0.2.1", | ||
"nodejs-polars-linux-arm64-musl": "0.2.1", | ||
"nodejs-polars-win32-arm64-msvc": "0.2.1" | ||
"nodejs-polars-win32-x64-msvc": "0.3.0", | ||
"nodejs-polars-darwin-x64": "0.3.0", | ||
"nodejs-polars-linux-x64-gnu": "0.3.0", | ||
"nodejs-polars-win32-ia32-msvc": "0.3.0", | ||
"nodejs-polars-linux-arm64-gnu": "0.3.0", | ||
"nodejs-polars-linux-arm-gnueabihf": "0.3.0", | ||
"nodejs-polars-darwin-arm64": "0.3.0", | ||
"nodejs-polars-android-arm64": "0.3.0", | ||
"nodejs-polars-linux-x64-musl": "0.3.0", | ||
"nodejs-polars-linux-arm64-musl": "0.3.0", | ||
"nodejs-polars-win32-arm64-msvc": "0.3.0" | ||
} | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
330632
8680