nodejs-polars
Advanced tools
Comparing version 0.8.4 to 0.9.0
@@ -224,2 +224,23 @@ "use strict"; | ||
}, | ||
upsample(opts, every, offset, by, maintainOrder) { | ||
let timeColumn; | ||
if (typeof opts === "string") { | ||
timeColumn = opts; | ||
} | ||
else { | ||
timeColumn = opts.timeColumn; | ||
by = opts.by; | ||
offset = opts.offset; | ||
every = opts.every; | ||
maintainOrder = opts.maintainOrder ?? false; | ||
} | ||
if (typeof by === "string") { | ||
by = [by]; | ||
} | ||
else { | ||
by = by ?? []; | ||
} | ||
offset = offset ?? "0ns"; | ||
return (0, exports._DataFrame)(_df.upsample(by, timeColumn, every, offset, maintainOrder)); | ||
}, | ||
hashRows(obj = 0n, k1 = 1n, k2 = 2n, k3 = 3n) { | ||
@@ -281,3 +302,3 @@ if (typeof obj === "number" || typeof obj === "bigint") { | ||
} | ||
return wrap("max"); | ||
return this.lazy().max().collectSync(); | ||
}, | ||
@@ -288,6 +309,6 @@ mean(axis = 0, nullStrategy = "ignore") { | ||
} | ||
return wrap("mean"); | ||
return this.lazy().mean().collectSync(); | ||
}, | ||
median() { | ||
return wrap("median"); | ||
return this.lazy().median().collectSync(); | ||
}, | ||
@@ -301,3 +322,3 @@ melt(ids, values) { | ||
} | ||
return wrap("min"); | ||
return this.lazy().min().collectSync(); | ||
}, | ||
@@ -310,3 +331,3 @@ nChunks() { | ||
}, | ||
partitionBy(by, strict = false, includeKey, mapFn = (df) => df) { | ||
partitionBy(by, strict = false, includeKey = true, mapFn = (df) => df) { | ||
by = Array.isArray(by) ? by : [by]; | ||
@@ -346,4 +367,4 @@ return _df | ||
}, | ||
quantile(quantile, interpolation = "nearest") { | ||
return wrap("quantile", quantile, interpolation); | ||
quantile(quantile) { | ||
return this.lazy().quantile(quantile).collectSync(); | ||
}, | ||
@@ -432,3 +453,3 @@ rechunk() { | ||
std() { | ||
return wrap("std"); | ||
return this.lazy().std().collectSync(); | ||
}, | ||
@@ -439,3 +460,3 @@ sum(axis = 0, nullStrategy = "ignore") { | ||
} | ||
return wrap("sum"); | ||
return this.lazy().sum().collectSync(); | ||
}, | ||
@@ -612,3 +633,3 @@ tail: (length = 5) => wrap("tail", length), | ||
var() { | ||
return wrap("var"); | ||
return this.lazy().var().collectSync(); | ||
}, | ||
@@ -615,0 +636,0 @@ map: (fn) => map((0, exports._DataFrame)(_df), fn), |
@@ -62,2 +62,3 @@ import { Field } from "./field"; | ||
static get Utf8(): DataType; | ||
static get String(): DataType; | ||
toString(): string; | ||
@@ -101,2 +102,4 @@ toJSON(): { | ||
} | ||
declare class _String extends DataType { | ||
} | ||
declare class _Categorical extends DataType { | ||
@@ -178,2 +181,4 @@ } | ||
type Utf8 = _Utf8; | ||
/** Utf8 */ | ||
type String = _String; | ||
/** Categorical */ | ||
@@ -180,0 +185,0 @@ type Categorical = _Categorical; |
@@ -100,2 +100,5 @@ "use strict"; | ||
} | ||
static get String() { | ||
return new _String(); | ||
} | ||
toString() { | ||
@@ -160,2 +163,4 @@ if (this.inner) { | ||
} | ||
class _String extends DataType { | ||
} | ||
class _Categorical extends DataType { | ||
@@ -162,0 +167,0 @@ } |
@@ -31,2 +31,3 @@ import { DataType, TimeUnit } from "./datatype"; | ||
Utf8: string; | ||
String: string; | ||
List: string; | ||
@@ -33,0 +34,0 @@ Date: string; |
@@ -29,2 +29,3 @@ "use strict"; | ||
Utf8: "Str", | ||
String: "Str", | ||
List: "List", | ||
@@ -81,2 +82,5 @@ Date: "Date", | ||
}, | ||
String(name, values, strict) { | ||
return polars_internal_1.default.JsSeries.newOptStr(name, values, strict); | ||
}, | ||
Categorical(name, values, strict) { | ||
@@ -83,0 +87,0 @@ return polars_internal_1.default.JsSeries.newOptStr(name, values, strict); |
@@ -43,2 +43,3 @@ import * as series from "./series"; | ||
export import Utf8 = DataType.Utf8; | ||
export import String = DataType.String; | ||
export import List = DataType.List; | ||
@@ -45,0 +46,0 @@ export import Date = DataType.Date; |
@@ -75,2 +75,4 @@ "use strict"; | ||
pl.Utf8 = datatypes_1.DataType.Utf8; | ||
// biome-ignore lint/suspicious/noShadowRestrictedNames: pl.String | ||
pl.String = datatypes_1.DataType.String; | ||
pl.List = datatypes_1.DataType.List; | ||
@@ -77,0 +79,0 @@ // biome-ignore lint/suspicious/noShadowRestrictedNames: pl.Date |
@@ -10,3 +10,2 @@ /// <reference types="node" /> | ||
nRows: number; | ||
batchSize: number; | ||
hasHeader: boolean; | ||
@@ -18,2 +17,3 @@ ignoreErrors: boolean; | ||
sep: string; | ||
schema: Record<string, DataType>; | ||
columns: string[]; | ||
@@ -77,2 +77,3 @@ rechunk: boolean; | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.schema -Set the CSV file's schema. This only accepts datatypes that are implemented in the csv parser and expects a complete Schema. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
@@ -79,0 +80,0 @@ * @param options.commentChar - character that indicates the start of a comment line, for instance '#'. |
@@ -6,3 +6,3 @@ import { DataFrame } from "../dataframe"; | ||
import { Deserialize, GroupByOps, Serialize } from "../shared_traits"; | ||
import { LazyOptions, LazyJoinOptions } from "../types"; | ||
import { LazyOptions, LazyJoinOptions, SinkCsvOptions, SinkParquetOptions } from "../types"; | ||
import { Series } from "../series"; | ||
@@ -434,2 +434,102 @@ declare const inspect: unique symbol; | ||
withRowCount(): any; | ||
/*** | ||
* | ||
* Evaluate the query in streaming mode and write to a CSV file. | ||
.. warning:: | ||
Streaming mode is considered **unstable**. It may be changed | ||
at any point without it being considered a breaking change. | ||
This allows streaming results that are larger than RAM to be written to disk. | ||
Parameters | ||
---------- | ||
@param path - File path to which the file should be written. | ||
@param includeBom - Whether to include UTF-8 BOM in the CSV output. | ||
@param includeHeader - Whether to include header in the CSV output. | ||
@param separator - Separate CSV fields with this symbol. | ||
@param lineTerminator - String used to end each row. | ||
@param quoteChar - Byte to use as quoting character. | ||
@param batchSize - Number of rows that will be processed per thread. Default - 1024 | ||
@param datetimeFormat - A format string, with the specifiers defined by the | ||
`chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_ | ||
Rust crate. If no format specified, the default fractional-second | ||
precision is inferred from the maximum timeunit found in the frame's | ||
Datetime cols (if any). | ||
@param dateFormat - A format string, with the specifiers defined by the | ||
`chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_ | ||
Rust crate. | ||
@param timeFormat A format string, with the specifiers defined by the | ||
`chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_ | ||
Rust crate. | ||
@param floatPrecision - Number of decimal places to write, applied to both `Float32` and `Float64` datatypes. | ||
@param nullValue - A string representing null values (defaulting to the empty string). | ||
@param quoteStyle - Determines the quoting strategy used. : {'necessary', 'always', 'non_numeric', 'never'} | ||
- necessary (default): This puts quotes around fields only when necessary. | ||
They are necessary when fields contain a quote, | ||
delimiter or record terminator. | ||
Quotes are also necessary when writing an empty record | ||
(which is indistinguishable from a record with one empty field). | ||
This is the default. | ||
- always: This puts quotes around every field. Always. | ||
- never: This never puts quotes around fields, even if that results in | ||
invalid CSV data (e.g.: by not quoting strings containing the | ||
separator). | ||
- non_numeric: This puts quotes around all fields that are non-numeric. | ||
Namely, when writing a field that does not parse as a valid float | ||
or integer, then quotes will be used even if they aren`t strictly | ||
necessary. | ||
@param maintainOrder - Maintain the order in which data is processed. | ||
Setting this to `False` will be slightly faster. | ||
Examples | ||
-------- | ||
>>> const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") | ||
>>> lf.sinkCsv("out.csv") | ||
*/ | ||
sinkCSV(path: string, options?: SinkCsvOptions): void; | ||
/*** | ||
* | ||
* Evaluate the query in streaming mode and write to a Parquet file. | ||
.. warning:: | ||
Streaming mode is considered **unstable**. It may be changed | ||
at any point without it being considered a breaking change. | ||
This allows streaming results that are larger than RAM to be written to disk. | ||
Parameters | ||
---------- | ||
@param path - File path to which the file should be written. | ||
@param compression : {'lz4', 'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'zstd'} | ||
Choose "zstd" for good compression performance. (default) | ||
Choose "lz4" for fast compression/decompression. | ||
Choose "snappy" for more backwards compatibility guarantees | ||
when you deal with older parquet readers. | ||
@param compressionLevel - The level of compression to use. Higher compression means smaller files on disk. | ||
- "gzip" : min-level: 0, max-level: 10. | ||
- "brotli" : min-level: 0, max-level: 11. | ||
- "zstd" : min-level: 1, max-level: 22. | ||
@param statistics - Write statistics to the parquet headers. This requires extra compute. Default - false | ||
@param rowGroupSize - Size of the row groups in number of rows. | ||
If None (default), the chunks of the `DataFrame` are | ||
used. Writing in smaller chunks may reduce memory pressure and improve | ||
writing speeds. | ||
@param dataPagesizeLimit - Size limit of individual data pages. | ||
If not set defaults to 1024 * 1024 bytes | ||
@param maintainOrder - Maintain the order in which data is processed. Default -> true | ||
Setting this to `False` will be slightly faster. | ||
@param typeCoercion - Do type coercion optimization. Default -> true | ||
@param predicatePushdown - Do predicate pushdown optimization. Default -> true | ||
@param projectionPushdown - Do projection pushdown optimization. Default -> true | ||
@param simplifyExpression - Run simplify expressions optimization. Default -> true | ||
@param slicePushdown - Slice pushdown optimization. Default -> true | ||
@param noOptimization - Turn off (certain) optimizations. Default -> false | ||
Examples | ||
-------- | ||
>>> const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP | ||
>>> lf.sinkParquet("out.parquet") # doctest: +SKIP | ||
*/ | ||
sinkParquet(path: string, options?: SinkParquetOptions): void; | ||
} | ||
@@ -436,0 +536,0 @@ /** @ignore */ |
@@ -345,2 +345,10 @@ "use strict"; | ||
}, | ||
sinkCSV(path, options = {}) { | ||
options.maintainOrder = options.maintainOrder ?? false; | ||
_ldf.sinkCsv(path, options); | ||
}, | ||
sinkParquet(path, options = {}) { | ||
options.compression = options.compression ?? "zstd"; | ||
_ldf.sinkParquet(path, options); | ||
}, | ||
}; | ||
@@ -347,0 +355,0 @@ }; |
@@ -49,6 +49,6 @@ import * as dt from "./datetime"; | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > const df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3], | ||
* ... "b": ["a", "b", None], | ||
* ... }) | ||
* ... }); | ||
* > df | ||
@@ -136,8 +136,8 @@ * shape: (3, 2) | ||
* ``` | ||
* >df = pl.DataFrame({ | ||
* > const df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3], | ||
* ... "b": ["a", "b", None], | ||
* ... "c": [None, 2, 1], | ||
* ...}) | ||
* >df | ||
* ...}); | ||
* > df | ||
* shape: (3, 3) | ||
@@ -155,5 +155,5 @@ * ╭─────┬──────┬──────╮ | ||
* ╰─────┴──────┴──────╯ | ||
* >df.select( | ||
* > df.select( | ||
* ... pl.col("*").exclude("b"), | ||
* ... ) | ||
* ... ); | ||
* shape: (3, 2) | ||
@@ -249,9 +249,9 @@ * ╭─────┬──────╮ | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > const df = pl.DataFrame({ | ||
* ... "sets": [[1, 2, 3], [1, 2], [9, 10]], | ||
* ... "optional_members": [1, 2, 3] | ||
* ... }) | ||
* ... }); | ||
* > df.select( | ||
* ... pl.col("optional_members").isIn("sets").alias("contains") | ||
* ... ) | ||
* ... ); | ||
* shape: (3, 1) | ||
@@ -291,6 +291,6 @@ * ┌──────────┐ | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > const df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3], | ||
* ... "b": ["a", "b", None], | ||
* ... }) | ||
* ... }); | ||
* | ||
@@ -300,3 +300,3 @@ * > df | ||
* ... .agg(pl.col("b").list()) | ||
* ... .sort({by:"a"}) | ||
* ... .sort({by:"a"}); | ||
* | ||
@@ -376,9 +376,9 @@ * shape: (3, 2) | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > const df = pl.DataFrame({ | ||
* ... "groups": [1, 1, 2, 2, 1, 2, 3, 3, 1], | ||
* ... "values": [1, 2, 3, 4, 5, 6, 7, 8, 8], | ||
* ... }) | ||
* ... }); | ||
* > df.select( | ||
* ... pl.col("groups").sum().over("groups") | ||
* ... ) | ||
* ... ); | ||
* ╭────────┬────────╮ | ||
@@ -421,3 +421,3 @@ * │ groups ┆ values │ | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > const df = pl.DataFrame({ | ||
* ... "A": [1, 2, 3, 4, 5], | ||
@@ -427,3 +427,3 @@ * ... "fruits": ["banana", "banana", "apple", "apple", "banana"], | ||
* ... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], | ||
* ... }) | ||
* ... }); | ||
* shape: (5, 4) | ||
@@ -446,3 +446,3 @@ * ╭─────┬──────────┬─────┬──────────╮ | ||
* > df.select( | ||
* ... pl.all().reverse().prefix("reverse_"), | ||
* ... pl.col("*").reverse().prefix("reverse_"), | ||
* ... ) | ||
@@ -544,4 +544,3 @@ * shape: (5, 8) | ||
---------- | ||
@param by | ||
The column(s) used for sorting. | ||
@param by The column(s) used for sorting. | ||
@param reverse | ||
@@ -580,3 +579,3 @@ false -> order from small to large. | ||
/** Take every nth value in the Series and return as a new Series. */ | ||
gatherEvery(n: number): Expr; | ||
gatherEvery(n: number, offset?: number): Expr; | ||
/** | ||
@@ -583,0 +582,0 @@ * Get the unique values of this expression; |
@@ -37,2 +37,3 @@ "use strict"; | ||
const series_1 = require("../../series"); | ||
const types_1 = require("util/types"); | ||
const _Expr = (_expr) => { | ||
@@ -388,3 +389,3 @@ const unwrap = (method, ...args) => { | ||
rollingMedian: rolling("rollingMedian"), | ||
rollingQuantile(val, interpolation, windowSize, weights, minPeriods, center, by, closedWindow) { | ||
rollingQuantile(val, interpolation, windowSize, weights, minPeriods, center, by, closedWindow, warnIfUnsorted) { | ||
if (typeof val === "number") { | ||
@@ -403,3 +404,3 @@ return wrap("rollingQuantile", { | ||
} | ||
return wrap("rollingQuantile", val.quantile, val.interpolation ?? "nearest", `${windowSize}i`, val?.["weights"] ?? weights ?? null, val?.["minPeriods"] ?? minPeriods ?? windowSize, val?.["center"] ?? center ?? false, val?.["by"] ?? by, val?.["closedWindow"] ?? closedWindow ?? "left"); | ||
return wrap("rollingQuantile", val.quantile, val.interpolation ?? "nearest", `${windowSize}i`, val?.["weights"] ?? weights ?? null, val?.["minPeriods"] ?? minPeriods ?? windowSize, val?.["center"] ?? center ?? false, val?.["by"] ?? by, val?.["closedWindow"] ?? closedWindow ?? "left", val?.["warnIfUnsorted"] ?? warnIfUnsorted ?? true); | ||
}, | ||
@@ -480,4 +481,4 @@ rollingSkew(val, bias = true) { | ||
}, | ||
gatherEvery(n) { | ||
return (0, exports._Expr)(_expr.gatherEvery(n)); | ||
gatherEvery(n, offset = 0) { | ||
return (0, exports._Expr)(_expr.gatherEvery(n, offset)); | ||
}, | ||
@@ -540,2 +541,5 @@ unique(opt) { | ||
const exprToLitOrExpr = (expr, stringToLit = true) => { | ||
if ((0, types_1.isRegExp)(expr)) { | ||
return (0, exports._Expr)(polars_internal_1.default.lit((0, utils_1.regexToString)(expr))); | ||
} | ||
if (typeof expr === "string" && !stringToLit) { | ||
@@ -542,0 +546,0 @@ return (0, exports._Expr)(polars_internal_1.default.col(expr)); |
@@ -66,4 +66,13 @@ "use strict"; | ||
}, | ||
join(separator = ",") { | ||
return wrap("listJoin", (0, expr_1.exprToLitOrExpr)(separator)._expr); | ||
join(options) { | ||
if (typeof options === "string") { | ||
options = { separator: options }; | ||
} | ||
options = options ?? {}; | ||
let separator = options?.separator ?? ","; | ||
const ignoreNulls = options?.ignoreNulls ?? false; | ||
if (!expr_1.Expr.isExpr(separator)) { | ||
separator = polars_internal_1.default.lit(separator); | ||
} | ||
return wrap("listJoin", separator, ignoreNulls); | ||
}, | ||
@@ -70,0 +79,0 @@ last() { |
@@ -110,3 +110,3 @@ import { StringFunctions } from "../../shared_traits"; | ||
*/ | ||
extract(pat: string | RegExp, groupIndex: number): Expr; | ||
extract(pat: any, groupIndex: number): Expr; | ||
/** | ||
@@ -117,2 +117,3 @@ * Parse string values as JSON. | ||
* @returns DF with struct | ||
* @deprecated @since 0.8.4 @use {@link jsonDecode} | ||
* @example | ||
@@ -138,2 +139,26 @@ | ||
/** | ||
* Parse string values as JSON. | ||
* Throw errors if encounter invalid JSON strings. | ||
* @params Not implemented ATM | ||
* @returns DF with struct | ||
* @example | ||
* >>> df = pl.DataFrame( {json: ['{"a":1, "b": true}', null, '{"a":2, "b": false}']} ) | ||
* >>> df.select(pl.col("json").str.jsonDecode()) | ||
* shape: (3, 1) | ||
* ┌─────────────┐ | ||
* │ json │ | ||
* │ --- │ | ||
* │ struct[2] │ | ||
* ╞═════════════╡ | ||
* │ {1,true} │ | ||
* │ {null,null} │ | ||
* │ {2,false} │ | ||
* └─────────────┘ | ||
* See Also | ||
* ---------- | ||
* jsonPathMatch : Extract the first match of json string with provided JSONPath expression. | ||
*/ | ||
jsonDecode(dtype?: DataType, inferSchemaLength?: number): Expr; | ||
/** | ||
* Extract the first match of json string with provided JSONPath expression. | ||
@@ -249,3 +274,3 @@ * Throw errors if encounter invalid json strings. | ||
*/ | ||
zFill(length: number): Expr; | ||
zFill(length: number | Expr): Expr; | ||
/** | ||
@@ -289,3 +314,3 @@ * Add a trailing fillChar to a string until string length is reached. | ||
*/ | ||
slice(start: number, length?: number): Expr; | ||
slice(start: number | Expr, length?: number | Expr): Expr; | ||
/** | ||
@@ -292,0 +317,0 @@ * Split a string into substrings using the specified separator and return them as a Series. |
@@ -7,2 +7,3 @@ "use strict"; | ||
const expr_1 = require("../expr"); | ||
const functions_1 = require("../functions"); | ||
const ExprStringFunctions = (_expr) => { | ||
@@ -46,7 +47,10 @@ const wrap = (method, ...args) => { | ||
extract(pat, groupIndex) { | ||
return wrap("strExtract", (0, utils_1.regexToString)(pat), groupIndex); | ||
return wrap("strExtract", (0, expr_1.exprToLitOrExpr)(pat, true)._expr, groupIndex); | ||
}, | ||
jsonExtract(dtype, inferSchemaLength) { | ||
return wrap("strJsonExtract", dtype, inferSchemaLength); | ||
return wrap("strJsonDecode", dtype, inferSchemaLength); | ||
}, | ||
jsonDecode(dtype, inferSchemaLength) { | ||
return wrap("strJsonDecode", dtype, inferSchemaLength); | ||
}, | ||
jsonPathMatch(pat) { | ||
@@ -74,3 +78,6 @@ return wrap("strJsonPathMatch", pat); | ||
zFill(length) { | ||
return wrap("strZFill", length); | ||
if (!expr_1.Expr.isExpr(length)) { | ||
length = (0, functions_1.lit)(length)._expr; | ||
} | ||
return wrap("zfill", length); | ||
}, | ||
@@ -81,2 +88,8 @@ padEnd(length, fillChar) { | ||
slice(start, length) { | ||
if (!expr_1.Expr.isExpr(start)) { | ||
start = (0, functions_1.lit)(start)._expr; | ||
} | ||
if (!expr_1.Expr.isExpr(length)) { | ||
length = (0, functions_1.lit)(length)._expr; | ||
} | ||
return wrap("strSlice", start, length); | ||
@@ -83,0 +96,0 @@ }, |
@@ -124,6 +124,7 @@ import { Expr } from "./expr"; | ||
step: number; | ||
dtype: DataType; | ||
eager?: boolean; | ||
}): any; | ||
export declare function intRange(low: any, high?: any, step?: number, eager?: true): Series; | ||
export declare function intRange(low: any, high?: any, step?: number, eager?: false): Expr; | ||
export declare function intRange(low: any, high?: any, step?: number, dtype?: DataType, eager?: true): Series; | ||
export declare function intRange(low: any, high?: any, step?: number, dtype?: DataType, eager?: false): Expr; | ||
/*** | ||
@@ -142,4 +143,4 @@ * Generate a range of integers for each row of the input columns. | ||
*/ | ||
export declare function intRanges(start: any, end: any, step?: number, eager?: false): Expr; | ||
export declare function intRanges(start: any, end: any, step?: number, eager?: true): Series; | ||
export declare function intRanges(start: any, end: any, step?: number, dtype?: DataType, eager?: false): Expr; | ||
export declare function intRanges(start: any, end: any, step?: number, dtype?: DataType, eager?: true): Series; | ||
/** Alias for `pl.col("*")` */ | ||
@@ -184,4 +185,5 @@ export declare function all(): Expr; | ||
sep: string; | ||
ignoreNulls?: boolean; | ||
}): any; | ||
export declare function concatString(exprs: ExprOrString[], sep?: string): any; | ||
export declare function concatString(exprs: ExprOrString[], sep?: string, ignoreNulls?: boolean): any; | ||
/** Count the number of values in this column. */ | ||
@@ -188,0 +190,0 @@ export declare function count(column: string): Expr; |
@@ -8,2 +8,3 @@ "use strict"; | ||
const expr_1 = require("./expr"); | ||
const datatypes_1 = require("../datatypes"); | ||
const series_1 = require("../series"); | ||
@@ -133,5 +134,5 @@ const dataframe_1 = require("../dataframe"); | ||
exports.lit = lit; | ||
function intRange(opts, high, step = 1, eager) { | ||
function intRange(opts, high, step = 1, dtype = datatypes_1.DataType.Int64, eager) { | ||
if (typeof opts?.low === "number") { | ||
return intRange(opts.low, opts.high, opts.step, opts.eager); | ||
return intRange(opts.low, opts.high, opts.step, opts.dtype, opts.eager); | ||
} | ||
@@ -146,15 +147,16 @@ const low = (0, expr_1.exprToLitOrExpr)(opts, false); | ||
} | ||
return (0, expr_1._Expr)(polars_internal_1.default.intRange(low, high, step, eager)); | ||
return (0, expr_1._Expr)(polars_internal_1.default.intRange(low, high, step, dtype)); | ||
} | ||
exports.intRange = intRange; | ||
function intRanges(start, end, step = 1, eager) { | ||
function intRanges(start, end, step = 1, dtype = datatypes_1.DataType.Int64, eager) { | ||
start = (0, expr_1.exprToLitOrExpr)(start, false); | ||
end = (0, expr_1.exprToLitOrExpr)(end, false); | ||
step = (0, expr_1.exprToLitOrExpr)(step, false); | ||
if (eager) { | ||
const df = (0, dataframe_1.DataFrame)({ a: [1] }); | ||
return df | ||
.select(intRanges(start, end, step).alias("intRanges")) | ||
.select(intRanges(start, end, step, dtype).alias("intRanges")) | ||
.getColumn("intRanges"); | ||
} | ||
return (0, expr_1._Expr)(polars_internal_1.default.intRanges(start, end, step, eager)); | ||
return (0, expr_1._Expr)(polars_internal_1.default.intRanges(start, end, step, dtype)); | ||
} | ||
@@ -206,8 +208,8 @@ exports.intRanges = intRanges; | ||
exports.concatList = concatList; | ||
function concatString(opts, sep = ",") { | ||
function concatString(opts, sep = ",", ignoreNulls = true) { | ||
if (opts?.exprs) { | ||
return concatString(opts.exprs, opts.sep); | ||
return concatString(opts.exprs, opts.sep, opts.ignoreNulls); | ||
} | ||
const items = (0, utils_1.selectionToExprList)(opts, false); | ||
return expr_1.Expr(polars_internal_1.default.concatStr(items, sep)); | ||
return expr_1.Expr(polars_internal_1.default.concatStr(items, sep, ignoreNulls)); | ||
} | ||
@@ -214,0 +216,0 @@ exports.concatString = concatString; |
@@ -455,5 +455,11 @@ import { DataType, Optional } from "../datatypes"; | ||
* Checks if this Series datatype is a Utf8. | ||
* @deprecated *since 0.8.4* | ||
* @see Use `Series.dtype.equals(pl.String)` instead. | ||
*/ | ||
isUtf8(): boolean; | ||
/** | ||
* Checks if this Series datatype is a String. | ||
*/ | ||
isString(): boolean; | ||
/** | ||
* __Compute the kurtosis (Fisher or Pearson) of a dataset.__ | ||
@@ -729,6 +735,13 @@ * | ||
* @param value value to replace masked values with | ||
* @deprecated @since 0.8.4 @use {@link scatter} | ||
*/ | ||
set(filter: Series, value: any): Series; | ||
setAtIdx(indices: number[] | Series, value: any): void; | ||
/** | ||
* __Set masked values__ | ||
* @param filter Boolean mask | ||
* @param value value to replace masked values with | ||
*/ | ||
set(filter: Series, value: any): Series; | ||
scatter(indices: number[] | Series, value: any): void; | ||
/** | ||
* __Shift the values by a given period__ | ||
@@ -801,3 +814,4 @@ * | ||
* __Sort this Series.__ | ||
* @param reverse - Reverse sort | ||
* @param descending - Sort in descending order. | ||
* @param nullsLast - Place nulls at the end. | ||
* @example | ||
@@ -815,3 +829,3 @@ * ``` | ||
* ] | ||
* s.sort(true) | ||
* s.sort({descending: true}) | ||
* shape: (4,) | ||
@@ -828,5 +842,5 @@ * Series: 'a' [i64] | ||
sort(): Series; | ||
sort(reverse?: boolean): Series; | ||
sort(options: { | ||
reverse: boolean; | ||
descending?: boolean; | ||
nullsLast?: boolean; | ||
}): Series; | ||
@@ -864,3 +878,4 @@ /** | ||
* Take every nth value in the Series and return as new Series. | ||
* @param n - nth value to take | ||
* @param n - Gather every *n*-th row | ||
* @param offset - Start the row count at this offset | ||
* @example | ||
@@ -871,10 +886,17 @@ * ``` | ||
* shape: (2,) | ||
* Series: '' [i64] | ||
* Series: 'a' [i64] | ||
* [ | ||
* 1 | ||
* 3 | ||
* 1 | ||
* 3 | ||
* ] | ||
* s.gather_every(2, offset=1) | ||
* shape: (2,) | ||
* Series: 'a' [i64] | ||
* [ | ||
* 2 | ||
* 4 | ||
* ] | ||
* ``` | ||
*/ | ||
gatherEvery(n: number): Series; | ||
gatherEvery(n: number, offset?: number): Series; | ||
/** | ||
@@ -919,2 +941,4 @@ * Take values by index. | ||
* __Count the unique values in a Series.__ | ||
* @param sort - Sort the output by count in descending order. | ||
* If set to `False` (default), the order of the output is random. | ||
* ___ | ||
@@ -939,3 +963,3 @@ * @example | ||
*/ | ||
valueCounts(): DataFrame; | ||
valueCounts(sort?: boolean): DataFrame; | ||
/** | ||
@@ -942,0 +966,0 @@ * Where mask evaluates true, take values from self. |
@@ -187,3 +187,3 @@ "use strict"; | ||
} | ||
else if (s.isUtf8()) { | ||
else if (s.isString()) { | ||
stats = { | ||
@@ -359,2 +359,5 @@ unique: s.nUnique(), | ||
}, | ||
isString() { | ||
return this.dtype.equals(datatypes_1.DataType.String); | ||
}, | ||
isUtf8() { | ||
@@ -513,2 +516,5 @@ return this.dtype.equals(datatypes_1.DataType.Utf8); | ||
setAtIdx(indices, value) { | ||
_s.scatter(indices, value); | ||
}, | ||
scatter(indices, value) { | ||
indices = exports.Series.isSeries(indices) | ||
@@ -526,3 +532,3 @@ ? indices.cast(datatypes_1.DataType.UInt32) | ||
} | ||
_s.setAtIdx(indices._s, value._s); | ||
_s.scatter(indices._s, value._s); | ||
}, | ||
@@ -580,7 +586,5 @@ set(mask, value) { | ||
}, | ||
sort(reverse) { | ||
if (typeof reverse === "boolean") { | ||
return wrap("sort", reverse); | ||
} | ||
return wrap("sort", reverse?.reverse ?? false); | ||
sort(options) { | ||
options = { descending: false, nullsLast: false, ...(options ?? {}) }; | ||
return wrap("sort", options.descending, options.nullsLast); | ||
}, | ||
@@ -599,4 +603,4 @@ sub(field) { | ||
}, | ||
gatherEvery(n) { | ||
return wrap("gatherEvery", n); | ||
gatherEvery(n, offset) { | ||
return wrap("gatherEvery", n, offset ?? 0); | ||
}, | ||
@@ -637,4 +641,4 @@ multiplyBy(field) { | ||
}, | ||
valueCounts() { | ||
return null; | ||
valueCounts(sorted) { | ||
return (0, dataframe_1._DataFrame)(unwrap("valueCounts", sorted ?? false)); | ||
}, | ||
@@ -657,3 +661,3 @@ values() { | ||
if (typeof prop !== "symbol" && !Number.isNaN(Number(prop))) { | ||
series.setAtIdx([Number(prop)], input); | ||
series.scatter([Number(prop)], input); | ||
return true; | ||
@@ -660,0 +664,0 @@ } |
@@ -47,4 +47,4 @@ "use strict"; | ||
}, | ||
join(separator = ",") { | ||
return wrap("join", separator); | ||
join(options) { | ||
return wrap("join", options); | ||
}, | ||
@@ -51,0 +51,0 @@ last() { |
@@ -0,1 +1,2 @@ | ||
import { Expr } from "./../lazy/expr/index"; | ||
import { DataType } from "../datatypes"; | ||
@@ -95,9 +96,10 @@ import { Series } from "."; | ||
*/ | ||
extract(pattern: string | RegExp, groupIndex: number): Series; | ||
extract(pattern: any, groupIndex: number): Series; | ||
/*** | ||
* Parse string values as JSON. | ||
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing. | ||
* @deprecated @since 0.8.4 @use {@link jsonDecode} | ||
* @example | ||
* s = pl.Series("json", ['{"a":1, "b": true}', null, '{"a":2, "b": false}']); | ||
* s.str.json_extract().as("json"); | ||
* s.str.jsonExtract().as("json"); | ||
* shape: (3,) | ||
@@ -112,2 +114,17 @@ * Series: 'json' [struct[2]] | ||
jsonExtract(dtype?: DataType, inferSchemaLength?: number): Series; | ||
/*** | ||
* Parse string values as JSON. | ||
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing. | ||
* @example | ||
* s = pl.Series("json", ['{"a":1, "b": true}', null, '{"a":2, "b": false}']); | ||
* s.str.jsonDecode().as("json"); | ||
* shape: (3,) | ||
* Series: 'json' [struct[2]] | ||
* [ | ||
* {1,true} | ||
* {null,null} | ||
* {2,false} | ||
* ] | ||
*/ | ||
jsonDecode(dtype?: DataType, inferSchemaLength?: number): Series; | ||
/** | ||
@@ -211,3 +228,3 @@ * Extract the first match of json string with provided JSONPath expression. | ||
*/ | ||
zFill(length: number): Series; | ||
zFill(length: number | Expr): Series; | ||
/** Add trailing zeros */ | ||
@@ -240,3 +257,3 @@ padEnd(length: number, fillChar: string): Series; | ||
*/ | ||
slice(start: number, length?: number): Series; | ||
slice(start: number | Expr, length?: number | Expr): Series; | ||
/** | ||
@@ -243,0 +260,0 @@ * Split a string into substrings using the specified separator. |
@@ -49,7 +49,14 @@ "use strict"; | ||
extract(pat, groupIndex) { | ||
return wrap("strExtract", (0, utils_1.regexToString)(pat), groupIndex); | ||
const s = (0, _1._Series)(_s); | ||
return s | ||
.toFrame() | ||
.select((0, functions_1.col)(s.name).str.extract(pat, groupIndex).as(s.name)) | ||
.getColumn(s.name); | ||
}, | ||
jsonExtract(dtype, inferSchemaLength) { | ||
return wrap("strJsonExtract", dtype, inferSchemaLength); | ||
return wrap("strJsonDecode", dtype, inferSchemaLength); | ||
}, | ||
jsonDecode(dtype, inferSchemaLength) { | ||
return wrap("strJsonDecode", dtype, inferSchemaLength); | ||
}, | ||
jsonPathMatch(pat) { | ||
@@ -68,3 +75,6 @@ return wrap("strJsonPathMatch", pat); | ||
zFill(length) { | ||
return wrap("strZFill", length); | ||
return (0, _1._Series)(_s) | ||
.toFrame() | ||
.select((0, functions_1.col)(_s.name).str.zFill(length).as(_s.name)) | ||
.getColumn(_s.name); | ||
}, | ||
@@ -84,3 +94,7 @@ padEnd(length, fillChar) { | ||
slice(start, length) { | ||
return wrap("strSlice", start, length); | ||
const s = (0, _1._Series)(_s); | ||
return s | ||
.toFrame() | ||
.select((0, functions_1.col)(s.name).str.slice(start, length).as(s.name)) | ||
.getColumn(s.name); | ||
}, | ||
@@ -87,0 +101,0 @@ split(by, options) { |
@@ -614,5 +614,11 @@ /// <reference types="node" /> | ||
* If omitted, the list elements are separated with a comma. | ||
* @param ignoreNulls - If true, null values will be ignored. | ||
* @category List | ||
*/ | ||
join(separator?: string): T; | ||
join(): T; | ||
join(separator: string | Expr): T; | ||
join(options: { | ||
separator?: string | Expr; | ||
ignoreNulls?: boolean; | ||
}): T; | ||
/** | ||
@@ -619,0 +625,0 @@ * Get the last value of the sublists. |
@@ -29,2 +29,39 @@ /** | ||
/** | ||
* Options for @see {@link LazyDataFrame.sinkCSV} | ||
* @category Options | ||
*/ | ||
export interface SinkCsvOptions { | ||
includeHeader?: boolean; | ||
quote?: string; | ||
includeBom?: boolean; | ||
separator?: string; | ||
lineTerminator?: string; | ||
quoteChar?: string; | ||
batchSize?: number; | ||
datetimeFormat?: string; | ||
dateFormat?: string; | ||
timeFormat?: string; | ||
floatPrecision?: number; | ||
nullValue?: string; | ||
maintainOrder?: boolean; | ||
} | ||
/** | ||
* Options for @see {@link LazyDataFrame.sinkParquet} | ||
* @category Options | ||
*/ | ||
export interface SinkParquetOptions { | ||
compression?: string; | ||
compressionLevel?: number; | ||
statistics?: boolean; | ||
rowGroupSize?: number; | ||
dataPagesizeLimit?: number; | ||
maintainOrder?: boolean; | ||
typeCoercion?: boolean; | ||
predicatePushdown?: boolean; | ||
projectionPushdown?: boolean; | ||
simplifyExpression?: boolean; | ||
slicePushdown?: boolean; | ||
noOptimization?: boolean; | ||
} | ||
/** | ||
* Options for {@link DataFrame.writeJSON} | ||
@@ -31,0 +68,0 @@ * @category Options |
{ | ||
"name": "nodejs-polars", | ||
"version": "0.8.4", | ||
"version": "0.9.0", | ||
"repository": "https://github.com/pola-rs/nodejs-polars.git", | ||
@@ -57,13 +57,13 @@ "license": "MIT", | ||
"devDependencies": { | ||
"@biomejs/biome": "^1.5.0", | ||
"@napi-rs/cli": "^2.17.0", | ||
"@biomejs/biome": "^1.5.3", | ||
"@napi-rs/cli": "^2.18.0", | ||
"@types/chance": "^1.1.6", | ||
"@types/jest": "^29.5.11", | ||
"@types/node": "^20.10.6", | ||
"@types/jest": "^29.5.12", | ||
"@types/node": "^20.11.17", | ||
"chance": "^1.1.11", | ||
"jest": "^29.7.0", | ||
"source-map-support": "^0.5.21", | ||
"ts-jest": "^29.1.1", | ||
"ts-jest": "^29.1.2", | ||
"ts-node": "^10.9.2", | ||
"typedoc": "^0.25.6", | ||
"typedoc": "^0.25.8", | ||
"typescript": "5.3.3" | ||
@@ -76,11 +76,11 @@ }, | ||
"optionalDependencies": { | ||
"nodejs-polars-win32-x64-msvc": "0.8.4", | ||
"nodejs-polars-darwin-x64": "0.8.4", | ||
"nodejs-polars-linux-x64-gnu": "0.8.4", | ||
"nodejs-polars-darwin-arm64": "0.8.4", | ||
"nodejs-polars-linux-arm64-gnu": "0.8.4", | ||
"nodejs-polars-linux-arm64-musl": "0.8.4", | ||
"nodejs-polars-android-arm64": "0.8.4", | ||
"nodejs-polars-linux-x64-musl": "0.8.4" | ||
"nodejs-polars-win32-x64-msvc": "0.9.0", | ||
"nodejs-polars-darwin-x64": "0.9.0", | ||
"nodejs-polars-linux-x64-gnu": "0.9.0", | ||
"nodejs-polars-darwin-arm64": "0.9.0", | ||
"nodejs-polars-linux-arm64-gnu": "0.9.0", | ||
"nodejs-polars-linux-arm64-musl": "0.9.0", | ||
"nodejs-polars-android-arm64": "0.9.0", | ||
"nodejs-polars-linux-x64-musl": "0.9.0" | ||
} | ||
} |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
483892
12991