nodejs-polars
Advanced tools
Comparing version 0.7.2 to 0.7.3
@@ -0,1 +1,4 @@ | ||
/** | ||
* Configure polars; offers options for table formatting and more. | ||
*/ | ||
export interface Config { | ||
@@ -17,2 +20,5 @@ /** Use utf8 characters to print tables */ | ||
} | ||
/** | ||
* @ignore | ||
*/ | ||
export declare const Config: Config; |
@@ -8,5 +8,8 @@ "use strict"; | ||
const native_polars_1 = __importDefault(require("./native-polars")); | ||
/** | ||
* @ignore | ||
*/ | ||
exports.Config = { | ||
setUtf8Tables() { | ||
delete process.env["POLARS_FMT_NO_UTF8"]; | ||
process.env["POLARS_FMT_NO_UTF8"] = undefined; | ||
return this; | ||
@@ -37,3 +40,3 @@ }, | ||
return this; | ||
} | ||
}, | ||
}; |
/// <reference types="node" /> | ||
/// <reference types="node" /> | ||
import { GroupBy, RollingGroupBy } from "./groupby"; | ||
import { LazyDataFrame } from "./lazy/dataframe"; | ||
import { Expr } from "./lazy/expr"; | ||
import { Series } from "./series/series"; | ||
import { Series } from "./series"; | ||
import { Writable } from "stream"; | ||
import { DataType, JoinBaseOptions } from "./datatypes"; | ||
import { ColumnSelection, FillNullStrategy, ColumnsOrExpr, ValueOrArray, ExprOrString } from "./utils"; | ||
import { WriteCsvOptions, WriteIPCOptions, WriteParquetOptions, WriteAvroOptions, FillNullStrategy, JoinOptions } from "./types"; | ||
import { DataType } from "./datatypes"; | ||
import { ColumnSelection, ColumnsOrExpr, ValueOrArray, ExprOrString } from "./utils"; | ||
import { Arithmetic, Deserialize, GroupByOps, Sample, Serialize } from "./shared_traits"; | ||
declare const inspect: unique symbol; | ||
declare type WriteCsvOptions = { | ||
hasHeader?: boolean; | ||
sep?: string; | ||
}; | ||
declare type WriteParquetOptions = { | ||
compression?: "uncompressed" | "snappy" | "gzip" | "lzo" | "brotli" | "lz4" | "zstd"; | ||
}; | ||
declare type WriteIPCOptions = { | ||
compression?: "uncompressed" | "lz4" | "zstd"; | ||
}; | ||
declare type WriteAvroOptions = { | ||
compression?: "uncompressed" | "snappy" | "deflate"; | ||
}; | ||
/** | ||
* Write methods for DataFrame | ||
*/ | ||
interface WriteMethods { | ||
@@ -36,8 +28,8 @@ /** | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.writeCSV() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.writeCSV() | ||
* foo,bar,ham | ||
@@ -49,3 +41,3 @@ * 1,6,a | ||
* // using a file path | ||
* >>> df.head(1).writeCSV("./foo.csv") | ||
* > df.head(1).writeCSV("./foo.csv") | ||
* // foo.csv | ||
@@ -56,11 +48,12 @@ * foo,bar,ham | ||
* // using a write stream | ||
* >>> const writeStream = new Stream.Writable({ | ||
* >>> write(chunk, encoding, callback) { | ||
* >>> console.log("writeStream: %O', chunk.toString()); | ||
* >>> callback(null); | ||
* >>> } | ||
* >>> }); | ||
* >>> df.head(1).writeCSV(writeStream, {hasHeader: false}) | ||
* > const writeStream = new Stream.Writable({ | ||
* ... write(chunk, encoding, callback) { | ||
* ... console.log("writeStream: %O', chunk.toString()); | ||
* ... callback(null); | ||
* ... } | ||
* ... }); | ||
* > df.head(1).writeCSV(writeStream, {hasHeader: false}) | ||
* writeStream: '1,6,a' | ||
* ``` | ||
* @category IO | ||
*/ | ||
@@ -77,12 +70,12 @@ writeCSV(): Buffer; | ||
* ``` | ||
* >>> const df = pl.DataFrame({ | ||
* >>> foo: [1,2,3], | ||
* >>> bar: ['a','b','c'] | ||
* >>> }) | ||
* > const df = pl.DataFrame({ | ||
* ... foo: [1,2,3], | ||
* ... bar: ['a','b','c'] | ||
* ... }) | ||
* | ||
* | ||
* >>> df.writeJSON({format:"json"}) | ||
* > df.writeJSON({format:"json"}) | ||
* `[ {"foo":1.0,"bar":"a"}, {"foo":2.0,"bar":"b"}, {"foo":3.0,"bar":"c"}]` | ||
* | ||
* >>> df.writeJSON({format:"lines"}) | ||
* > df.writeJSON({format:"lines"}) | ||
* `{"foo":1.0,"bar":"a"} | ||
@@ -93,4 +86,5 @@ * {"foo":2.0,"bar":"b"} | ||
* // writing to a file | ||
* >>> df.writeJSON("/path/to/file.json", {format:'lines'}) | ||
* > df.writeJSON("/path/to/file.json", {format:'lines'}) | ||
* ``` | ||
* @category IO | ||
*/ | ||
@@ -107,3 +101,4 @@ writeJSON(options?: { | ||
* @param options.compression Compression method *defaults to "uncompressed"* | ||
* */ | ||
* @category IO | ||
*/ | ||
writeIPC(options?: WriteIPCOptions): Buffer; | ||
@@ -115,3 +110,4 @@ writeIPC(destination: string | Writable, options?: WriteIPCOptions): void; | ||
* @param options.compression Compression method *defaults to "uncompressed"* | ||
* */ | ||
* @category IO | ||
*/ | ||
writeParquet(options?: WriteParquetOptions): Buffer; | ||
@@ -123,3 +119,3 @@ writeParquet(destination: string | Writable, options?: WriteParquetOptions): void; | ||
* @param options.compression Compression method *defaults to "uncompressed"* | ||
* | ||
* @category IO | ||
*/ | ||
@@ -130,78 +126,74 @@ writeAvro(options?: WriteAvroOptions): Buffer; | ||
/** | ||
* A DataFrame is a two-dimensional data structure that represents data as a table | ||
* with rows and columns. | ||
* | ||
A DataFrame is a two-dimensional data structure that represents data as a table | ||
with rows and columns. | ||
Parameters | ||
---------- | ||
@param data - Object, Array, or Series | ||
Two-dimensional data in various forms. object must contain Arrays. | ||
Array may contain Series or other Arrays. | ||
@param columns - Array of str, default undefined | ||
Column labels to use for resulting DataFrame. If specified, overrides any | ||
labels already present in the data. Must match data dimensions. | ||
@param orient - 'col' | 'row' default undefined | ||
Whether to interpret two-dimensional data as columns or as rows. If None, | ||
the orientation is inferred by matching the columns and data dimensions. If | ||
this does not yield conclusive results, column orientation is used. | ||
Examples | ||
-------- | ||
Constructing a DataFrame from an object : | ||
``` | ||
data = {'a': [1n, 2n], 'b': [3, 4]} | ||
df = pl.DataFrame(data) | ||
df | ||
shape: (2, 2) | ||
╭─────┬─────╮ | ||
│ a ┆ b │ | ||
│ --- ┆ --- │ | ||
│ u64 ┆ i64 │ | ||
╞═════╪═════╡ | ||
│ 1 ┆ 3 │ | ||
├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
│ 2 ┆ 4 │ | ||
╰─────┴─────╯ | ||
``` | ||
Notice that the dtype is automatically inferred as a polars Int64: | ||
``` | ||
df.dtypes | ||
['UInt64', `Int64'] | ||
``` | ||
In order to specify dtypes for your columns, initialize the DataFrame with a list | ||
of Series instead: | ||
``` | ||
data = [pl.Series('col1', [1, 2], pl.Float32), | ||
... pl.Series('col2', [3, 4], pl.Int64)] | ||
df2 = pl.DataFrame(series) | ||
df2 | ||
shape: (2, 2) | ||
╭──────┬──────╮ | ||
│ col1 ┆ col2 │ | ||
│ --- ┆ --- │ | ||
│ f32 ┆ i64 │ | ||
╞══════╪══════╡ | ||
│ 1 ┆ 3 │ | ||
├╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
│ 2 ┆ 4 │ | ||
╰──────┴──────╯ | ||
``` | ||
Constructing a DataFrame from a list of lists, row orientation inferred: | ||
``` | ||
data = [[1, 2, 3], [4, 5, 6]] | ||
df4 = pl.DataFrame(data, ['a', 'b', 'c']) | ||
df4 | ||
shape: (2, 3) | ||
╭─────┬─────┬─────╮ | ||
│ a ┆ b ┆ c │ | ||
│ --- ┆ --- ┆ --- │ | ||
│ i64 ┆ i64 ┆ i64 │ | ||
╞═════╪═════╪═════╡ | ||
│ 1 ┆ 2 ┆ 3 │ | ||
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
│ 4 ┆ 5 ┆ 6 │ | ||
╰─────┴─────┴─────╯ | ||
``` | ||
* @param data - Object, Array, or Series | ||
* Two-dimensional data in various forms. object must contain Arrays. | ||
* Array may contain Series or other Arrays. | ||
* @param columns - Array of str, default undefined | ||
* Column labels to use for resulting DataFrame. If specified, overrides any | ||
* labels already present in the data. Must match data dimensions. | ||
* @param orient - 'col' | 'row' default undefined | ||
* Whether to interpret two-dimensional data as columns or as rows. If None, | ||
* the orientation is inferred by matching the columns and data dimensions. If | ||
* this does not yield conclusive results, column orientation is used. | ||
* @example | ||
* Constructing a DataFrame from an object : | ||
* ``` | ||
* > data = {'a': [1n, 2n], 'b': [3, 4]} | ||
* > df = pl.DataFrame(data) | ||
* > df | ||
* shape: (2, 2) | ||
* ╭─────┬─────╮ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ u64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 4 │ | ||
* ╰─────┴─────╯ | ||
* ``` | ||
* Notice that the dtype is automatically inferred as a polars Int64: | ||
* ``` | ||
* > df.dtypes | ||
* ['UInt64', `Int64'] | ||
* ``` | ||
* In order to specify dtypes for your columns, initialize the DataFrame with a list | ||
* of Series instead: | ||
* ``` | ||
* > data = [pl.Series('col1', [1, 2], pl.Float32), | ||
* ... pl.Series('col2', [3, 4], pl.Int64)] | ||
* > df2 = pl.DataFrame(series) | ||
* > df2 | ||
* shape: (2, 2) | ||
* ╭──────┬──────╮ | ||
* │ col1 ┆ col2 │ | ||
* │ --- ┆ --- │ | ||
* │ f32 ┆ i64 │ | ||
* ╞══════╪══════╡ | ||
* │ 1 ┆ 3 │ | ||
* ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2 ┆ 4 │ | ||
* ╰──────┴──────╯ | ||
* ``` | ||
* | ||
* Constructing a DataFrame from a list of lists, row orientation inferred: | ||
* ``` | ||
* > data = [[1, 2, 3], [4, 5, 6]] | ||
* > df4 = pl.DataFrame(data, ['a', 'b', 'c']) | ||
* > df4 | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ i64 ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ 2 ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 5 ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, WriteMethods, Serialize, GroupByOps<RollingGroupBy> { | ||
export interface DataFrame extends Arithmetic<DataFrame>, Sample<DataFrame>, Arithmetic<DataFrame>, WriteMethods, Serialize, GroupByOps<RollingGroupBy> { | ||
/** @ignore */ | ||
@@ -231,8 +223,8 @@ _df: any; | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > 'a': [1.0, 2.8, 3.0], | ||
* > 'b': [4, 5, 6], | ||
* > "c": [True, False, True] | ||
* > }) | ||
* > df.describe() | ||
* > df = pl.DataFrame({ | ||
* ... 'a': [1.0, 2.8, 3.0], | ||
* ... 'b': [4, 5, 6], | ||
* ... "c": [True, False, True] | ||
* ... }) | ||
* ... df.describe() | ||
* shape: (5, 4) | ||
@@ -265,9 +257,9 @@ * ╭──────────┬───────┬─────┬──────╮ | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > "foo": [1, 2, 3], | ||
* > "bar": [6.0, 7.0, 8.0], | ||
* > "ham": ['a', 'b', 'c'], | ||
* > "apple": ['a', 'b', 'c'] | ||
* > }) | ||
* > df.drop(['ham', 'apple']) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6.0, 7.0, 8.0], | ||
* ... "ham": ['a', 'b', 'c'], | ||
* ... "apple": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.drop(['ham', 'apple']) | ||
* shape: (3, 2) | ||
@@ -285,5 +277,3 @@ * ╭─────┬─────╮ | ||
* ╰─────┴─────╯ | ||
* | ||
* ``` | ||
* | ||
*/ | ||
@@ -300,8 +290,8 @@ drop(name: string): DataFrame; | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > "foo": [1, 2, 3], | ||
* > "bar": [6, null, 8], | ||
* > "ham": ['a', 'b', 'c'] | ||
* > }) | ||
* > df.dropNulls() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, null, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.dropNulls() | ||
* shape: (2, 3) | ||
@@ -328,7 +318,7 @@ * ┌─────┬─────┬─────┐ | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > "letters": ["c", "c", "a", "c", "a", "b"], | ||
* > "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]] | ||
* > }) | ||
* > console.log(df) | ||
* > df = pl.DataFrame({ | ||
* ... "letters": ["c", "c", "a", "c", "a", "b"], | ||
* ... "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]] | ||
* ... }) | ||
* > df | ||
* shape: (6, 2) | ||
@@ -352,3 +342,3 @@ * ╭─────────┬────────────╮ | ||
* ╰─────────┴────────────╯ | ||
* > df.explode("nrs") | ||
* > df.explode("nrs") | ||
* shape: (13, 2) | ||
@@ -427,9 +417,9 @@ * ╭─────────┬─────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> // Filter on one condition | ||
* >>> df.filter(pl.col("foo").lt(3)) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* // Filter on one condition | ||
* > df.filter(pl.col("foo").lt(3)) | ||
* shape: (2, 3) | ||
@@ -445,7 +435,7 @@ * ┌─────┬─────┬─────┐ | ||
* └─────┴─────┴─────┘ | ||
* >>> // Filter on multiple conditions | ||
* >>> df.filter( | ||
* pl.col("foo").lt(3) | ||
* .and(pl.col("ham").eq("a")) | ||
* ) | ||
* // Filter on multiple conditions | ||
* > df.filter( | ||
* ... pl.col("foo").lt(3) | ||
* ... .and(pl.col("ham").eq("a")) | ||
* ... ) | ||
* shape: (1, 3) | ||
@@ -468,8 +458,8 @@ * ┌─────┬─────┬─────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.findIdxByName("ham")) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.findIdxByName("ham")) | ||
* 2 | ||
@@ -494,9 +484,9 @@ * ``` | ||
* ``` | ||
* >>> // A horizontal sum operation | ||
* >>> df = pl.DataFrame({ | ||
* >>> "a": [2, 1, 3], | ||
* >>> "b": [1, 2, 3], | ||
* >>> "c": [1.0, 2.0, 3.0] | ||
* >>> }) | ||
* >>> df.fold((s1, s2) => s1.plus(s2)) | ||
* > // A horizontal sum operation | ||
* > df = pl.DataFrame({ | ||
* ... "a": [2, 1, 3], | ||
* ... "b": [1, 2, 3], | ||
* ... "c": [1.0, 2.0, 3.0] | ||
* ... }) | ||
* > df.fold((s1, s2) => s1.plus(s2)) | ||
* Series: 'a' [f64] | ||
@@ -508,9 +498,9 @@ * [ | ||
* ] | ||
* >>> // A horizontal minimum operation | ||
* >>> df = pl.DataFrame({ | ||
* >>> "a": [2, 1, 3], | ||
* >>> "b": [1, 2, 3], | ||
* >>> "c": [1.0, 2.0, 3.0] | ||
* >>> }) | ||
* >>> df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2)) | ||
* > // A horizontal minimum operation | ||
* > df = pl.DataFrame({ | ||
* ... "a": [2, 1, 3], | ||
* ... "b": [1, 2, 3], | ||
* ... "c": [1.0, 2.0, 3.0] | ||
* ... }) | ||
* > df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2)) | ||
* Series: 'a' [f64] | ||
@@ -522,9 +512,9 @@ * [ | ||
* ] | ||
* >>> // A horizontal string concatenation | ||
* >>> df = pl.DataFrame({ | ||
* >>> "a": ["foo", "bar", 2], | ||
* >>> "b": [1, 2, 3], | ||
* >>> "c": [1.0, 2.0, 3.0] | ||
* >>> }) | ||
* >>> df.fold((s1, s2) => s.plus(s2)) | ||
* > // A horizontal string concatenation | ||
* > df = pl.DataFrame({ | ||
* ... "a": ["foo", "bar", 2], | ||
* ... "b": [1, 2, 3], | ||
* ... "c": [1.0, 2.0, 3.0] | ||
* ... }) | ||
* > df.fold((s1, s2) => s.plus(s2)) | ||
* Series: '' [f64] | ||
@@ -547,15 +537,15 @@ * [ | ||
* ``` | ||
* >>> df1 = pl.DataFrame({ | ||
* >> "foo": [1, 2, 3], | ||
* >> "bar": [6.0, 7.0, 8.0], | ||
* >> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df2 = pl.DataFrame({ | ||
* >>> "foo": [3, 2, 1], | ||
* >>> "bar": [8.0, 7.0, 6.0], | ||
* >>> "ham": ['c', 'b', 'a'] | ||
* >>> }) | ||
* >>> df1.frameEqual(df1) | ||
* > df1 = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6.0, 7.0, 8.0], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df2 = pl.DataFrame({ | ||
* ... "foo": [3, 2, 1], | ||
* ... "bar": [8.0, 7.0, 6.0], | ||
* ... "ham": ['c', 'b', 'a'] | ||
* ... }) | ||
* > df1.frameEqual(df1) | ||
* true | ||
* >>> df1.frameEqual(df2) | ||
* > df1.frameEqual(df2) | ||
* false | ||
@@ -600,8 +590,8 @@ * ``` | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3, 4, 5], | ||
* >>> "bar": [6, 7, 8, 9, 10], | ||
* >>> "ham": ['a', 'b', 'c', 'd','e'] | ||
* >>> }) | ||
* >>> df.head(3) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3, 4, 5], | ||
* ... "bar": [6, 7, 8, 9, 10], | ||
* ... "ham": ['a', 'b', 'c', 'd','e'] | ||
* ... }) | ||
* > df.head(3) | ||
* shape: (3, 3) | ||
@@ -628,9 +618,9 @@ * ╭─────┬─────┬─────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> x = pl.Series("apple", [10, 20, 30]) | ||
* >>> df.hStack([x]) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > x = pl.Series("apple", [10, 20, 30]) | ||
* > df.hStack([x]) | ||
* shape: (3, 4) | ||
@@ -686,12 +676,12 @@ * ╭─────┬─────┬─────┬───────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6.0, 7.0, 8.0], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> otherDF = pl.DataFrame({ | ||
* >>> "apple": ['x', 'y', 'z'], | ||
* >>> "ham": ['a', 'b', 'd'] | ||
* >>> }) | ||
* >>> df.join(otherDF, {on: 'ham'}) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6.0, 7.0, 8.0], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > otherDF = pl.DataFrame({ | ||
* ... "apple": ['x', 'y', 'z'], | ||
* ... "ham": ['a', 'b', 'd'] | ||
* ... }) | ||
* > df.join(otherDF, {on: 'ham'}) | ||
* shape: (2, 4) | ||
@@ -711,7 +701,7 @@ * ╭─────┬─────┬─────┬───────╮ | ||
on: ValueOrArray<string>; | ||
} & JoinBaseOptions): DataFrame; | ||
} & Omit<JoinOptions, "leftOn" | "rightOn">): DataFrame; | ||
join(other: DataFrame, options: { | ||
leftOn: ValueOrArray<string>; | ||
rightOn: ValueOrArray<string>; | ||
} & JoinBaseOptions): DataFrame; | ||
} & Omit<JoinOptions, "on">): DataFrame; | ||
join(other: DataFrame, options: { | ||
@@ -722,90 +712,86 @@ how: "cross"; | ||
/** | ||
* Perform an asof join. This is similar to a left-join except that we | ||
* match on nearest key rather than equal keys. | ||
* | ||
* Both DataFrames must be sorted by the asof_join key. | ||
* | ||
For each row in the left DataFrame: | ||
- A "backward" search selects the last row in the right DataFrame whose | ||
'on' key is less than or equal to the left's key. | ||
- A "forward" search selects the first row in the right DataFrame whose | ||
'on' key is greater than or equal to the left's key. | ||
The default is "backward". | ||
Parameters | ||
---------- | ||
@param other DataFrame to join with. | ||
@param options.leftOn Join column of the left DataFrame. | ||
@param options.rightOn Join column of the right DataFrame. | ||
@param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined. | ||
@param options.byLeft join on these columns before doing asof join | ||
@param options.byRight join on these columns before doing asof join | ||
@param options.strategy One of {'forward', 'backward'} | ||
@param options.suffix Suffix to append to columns with a duplicate name. | ||
@param options.tolerance | ||
Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. | ||
If an asof join is done on columns of dtype "Date", "Datetime" you | ||
use the following string language: | ||
- 1ns *(1 nanosecond)* | ||
- 1us *(1 microsecond)* | ||
- 1ms *(1 millisecond)* | ||
- 1s *(1 second)* | ||
- 1m *(1 minute)* | ||
- 1h *(1 hour)* | ||
- 1d *(1 day)* | ||
- 1w *(1 week)* | ||
- 1mo *(1 calendar month)* | ||
- 1y *(1 calendar year)* | ||
- 1i *(1 index count)* | ||
Or combine them: | ||
- "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds | ||
@param options.allowParallel Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel. | ||
@param options.forceParallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. | ||
@example | ||
``` | ||
>>> const gdp = pl.DataFrame({ | ||
... date: [ | ||
... new Date('2016-01-01'), | ||
... new Date('2017-01-01'), | ||
... new Date('2018-01-01'), | ||
... new Date('2019-01-01'), | ||
... ], // note record date: Jan 1st (sorted!) | ||
... gdp: [4164, 4411, 4566, 4696], | ||
... }) | ||
>>> const population = pl.DataFrame({ | ||
... date: [ | ||
... new Date('2016-05-12'), | ||
... new Date('2017-05-12'), | ||
... new Date('2018-05-12'), | ||
... new Date('2019-05-12'), | ||
... ], // note record date: May 12th (sorted!) | ||
... "population": [82.19, 82.66, 83.12, 83.52], | ||
... }) | ||
>>> population.joinAsof( | ||
... gdp, | ||
... {leftOn:"date", rightOn:"date", strategy:"backward"} | ||
... ) | ||
shape: (4, 3) | ||
┌─────────────────────┬────────────┬──────┐ | ||
│ date ┆ population ┆ gdp │ | ||
│ --- ┆ --- ┆ --- │ | ||
│ datetime[μs] ┆ f64 ┆ i64 │ | ||
╞═════════════════════╪════════════╪══════╡ | ||
│ 2016-05-12 00:00:00 ┆ 82.19 ┆ 4164 │ | ||
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
│ 2017-05-12 00:00:00 ┆ 82.66 ┆ 4411 │ | ||
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
│ 2018-05-12 00:00:00 ┆ 83.12 ┆ 4566 │ | ||
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
│ 2019-05-12 00:00:00 ┆ 83.52 ┆ 4696 │ | ||
└─────────────────────┴────────────┴──────┘ | ||
``` | ||
*/ | ||
* Perform an asof join. This is similar to a left-join except that we | ||
* match on nearest key rather than equal keys. | ||
* | ||
* Both DataFrames must be sorted by the asofJoin key. | ||
* | ||
* For each row in the left DataFrame: | ||
* - A "backward" search selects the last row in the right DataFrame whose | ||
* 'on' key is less than or equal to the left's key. | ||
* | ||
* - A "forward" search selects the first row in the right DataFrame whose | ||
* 'on' key is greater than or equal to the left's key. | ||
* | ||
* The default is "backward". | ||
* | ||
* @param other DataFrame to join with. | ||
* @param options.leftOn Join column of the left DataFrame. | ||
* @param options.rightOn Join column of the right DataFrame. | ||
* @param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined. | ||
* @param options.byLeft join on these columns before doing asof join | ||
* @param options.byRight join on these columns before doing asof join | ||
* @param options.strategy One of 'forward', 'backward' | ||
* @param options.suffix Suffix to append to columns with a duplicate name. | ||
* @param options.tolerance | ||
* Numeric tolerance. By setting this the join will only be done if the near keys are within this distance. | ||
* If an asof join is done on columns of dtype "Date", "Datetime" you | ||
* use the following string language: | ||
* | ||
* - 1ns *(1 nanosecond)* | ||
* - 1us *(1 microsecond)* | ||
* - 1ms *(1 millisecond)* | ||
* - 1s *(1 second)* | ||
* - 1m *(1 minute)* | ||
* - 1h *(1 hour)* | ||
* - 1d *(1 day)* | ||
* - 1w *(1 week)* | ||
* - 1mo *(1 calendar month)* | ||
* - 1y *(1 calendar year)* | ||
* - 1i *(1 index count)* | ||
* | ||
* Or combine them: | ||
* - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds | ||
* @param options.allowParallel Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel. | ||
* @param options.forceParallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. | ||
* | ||
* @example | ||
* ``` | ||
* > const gdp = pl.DataFrame({ | ||
* ... date: [ | ||
* ... new Date('2016-01-01'), | ||
* ... new Date('2017-01-01'), | ||
* ... new Date('2018-01-01'), | ||
* ... new Date('2019-01-01'), | ||
* ... ], // note record date: Jan 1st (sorted!) | ||
* ... gdp: [4164, 4411, 4566, 4696], | ||
* ... }) | ||
* > const population = pl.DataFrame({ | ||
* ... date: [ | ||
* ... new Date('2016-05-12'), | ||
* ... new Date('2017-05-12'), | ||
* ... new Date('2018-05-12'), | ||
* ... new Date('2019-05-12'), | ||
* ... ], // note record date: May 12th (sorted!) | ||
* ... "population": [82.19, 82.66, 83.12, 83.52], | ||
* ... }) | ||
* > population.joinAsof( | ||
* ... gdp, | ||
* ... {leftOn:"date", rightOn:"date", strategy:"backward"} | ||
* ... ) | ||
* shape: (4, 3) | ||
* ┌─────────────────────┬────────────┬──────┐ | ||
* │ date ┆ population ┆ gdp │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ datetime[μs] ┆ f64 ┆ i64 │ | ||
* ╞═════════════════════╪════════════╪══════╡ | ||
* │ 2016-05-12 00:00:00 ┆ 82.19 ┆ 4164 │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2017-05-12 00:00:00 ┆ 82.66 ┆ 4411 │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2018-05-12 00:00:00 ┆ 83.12 ┆ 4566 │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2019-05-12 00:00:00 ┆ 83.52 ┆ 4696 │ | ||
* └─────────────────────┴────────────┴──────┘ | ||
* ``` | ||
*/ | ||
joinAsof(other: DataFrame, options: { | ||
@@ -837,8 +823,8 @@ leftOn?: string; | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.max() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.max() | ||
* shape: (1, 3) | ||
@@ -873,8 +859,8 @@ * ╭─────┬─────┬──────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.median() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.median() | ||
* shape: (1, 3) | ||
@@ -899,9 +885,9 @@ * ╭─────┬─────┬──────╮ | ||
* ``` | ||
* >>> df1 = pl.DataFrame({ | ||
* >>> 'id': [1], | ||
* >>> 'asset_key_1': ['123'], | ||
* >>> 'asset_key_2': ['456'], | ||
* >>> 'asset_key_3': ['abc'], | ||
* >>> }) | ||
* >>> df1.melt('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']) | ||
* > df1 = pl.DataFrame({ | ||
* ... 'id': [1], | ||
* ... 'asset_key_1': ['123'], | ||
* ... 'asset_key_2': ['456'], | ||
* ... 'asset_key_3': ['abc'], | ||
* ... }) | ||
* > df1.melt('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']) | ||
* shape: (3, 3) | ||
@@ -928,8 +914,8 @@ * ┌─────┬─────────────┬───────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.min() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.min() | ||
* shape: (1, 3) | ||
@@ -957,8 +943,8 @@ * ╭─────┬─────┬──────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, null, 3], | ||
* >>> "bar": [6, 7, null], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.nullCount() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, null, 3], | ||
* ... "bar": [6, 7, null], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.nullCount() | ||
* shape: (1, 3) | ||
@@ -979,45 +965,41 @@ * ┌─────┬─────┬─────┐ | ||
* | ||
Create a spreadsheet-style pivot table as a DataFrame. | ||
Parameters | ||
---------- | ||
@param values Column values to aggregate. Can be multiple columns if the *columns* arguments contains multiple columns as well | ||
@param options.index One or multiple keys to group by | ||
@param options.columns Columns whose values will be used as the header of the output DataFrame | ||
@param options.aggregateFunc | ||
Any of: | ||
- "sum" | ||
- "max" | ||
- "min" | ||
- "mean" | ||
- "median" | ||
- "first" | ||
- "last" | ||
- "count" | ||
Defaults to "first" | ||
@param options.maintainOrder Sort the grouped keys so that the output order is predictable. | ||
@param options.sortColumns Sort the transposed columns by name. Default is by order of discovery. | ||
@example | ||
``` | ||
>>> df = pl.DataFrame( | ||
... { | ||
... "foo": ["one", "one", "one", "two", "two", "two"], | ||
... "bar": ["A", "B", "C", "A", "B", "C"], | ||
... "baz": [1, 2, 3, 4, 5, 6], | ||
... } | ||
... ) | ||
>>> df.pivot({values:"baz", index:"foo", columns:"bar"}) | ||
shape: (2, 4) | ||
┌─────┬─────┬─────┬─────┐ | ||
│ foo ┆ A ┆ B ┆ C │ | ||
│ --- ┆ --- ┆ --- ┆ --- │ | ||
│ str ┆ i64 ┆ i64 ┆ i64 │ | ||
╞═════╪═════╪═════╪═════╡ | ||
│ one ┆ 1 ┆ 2 ┆ 3 │ | ||
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
│ two ┆ 4 ┆ 5 ┆ 6 │ | ||
└─────┴─────┴─────┴─────┘ | ||
``` | ||
* Create a spreadsheet-style pivot table as a DataFrame. | ||
* | ||
* @param values Column values to aggregate. Can be multiple columns if the *columns* arguments contains multiple columns as well | ||
* @param options.index One or multiple keys to group by | ||
* @param options.columns Columns whose values will be used as the header of the output DataFrame | ||
* @param options.aggregateFunc | ||
* Any of: | ||
* - "sum" | ||
* - "max" | ||
* - "min" | ||
* - "mean" | ||
* - "median" | ||
* - "first" | ||
* - "last" | ||
* - "count" | ||
* Defaults to "first" | ||
* @param options.maintainOrder Sort the grouped keys so that the output order is predictable. | ||
* @param options.sortColumns Sort the transposed columns by name. Default is by order of discovery. | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame( | ||
* ... { | ||
* ... "foo": ["one", "one", "one", "two", "two", "two"], | ||
* ... "bar": ["A", "B", "C", "A", "B", "C"], | ||
* ... "baz": [1, 2, 3, 4, 5, 6], | ||
* ... } | ||
* ... ) | ||
* > df.pivot({values:"baz", index:"foo", columns:"bar"}) | ||
* shape: (2, 4) | ||
* ┌─────┬─────┬─────┬─────┐ | ||
* │ foo ┆ A ┆ B ┆ C │ | ||
* │ --- ┆ --- ┆ --- ┆ --- │ | ||
* │ str ┆ i64 ┆ i64 ┆ i64 │ | ||
* ╞═════╪═════╪═════╪═════╡ | ||
* │ one ┆ 1 ┆ 2 ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ two ┆ 4 ┆ 5 ┆ 6 │ | ||
* └─────┴─────┴─────┴─────┘ | ||
* ``` | ||
*/ | ||
@@ -1043,8 +1025,8 @@ pivot(values: string | string[], options: { | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.quantile(0.5) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.quantile(0.5) | ||
* shape: (1, 3) | ||
@@ -1074,8 +1056,8 @@ * ╭─────┬─────┬──────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.rename({"foo": "apple"}) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.rename({"foo": "apple"}) | ||
* ╭───────┬─────┬─────╮ | ||
@@ -1102,9 +1084,9 @@ * │ apple ┆ bar ┆ ham │ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> x = pl.Series("apple", [10, 20, 30]) | ||
* >>> df.replaceAtIdx(0, x) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > x = pl.Series("apple", [10, 20, 30]) | ||
* > df.replaceAtIdx(0, x) | ||
* shape: (3, 3) | ||
@@ -1130,8 +1112,8 @@ * ╭───────┬─────┬─────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.row(2) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.row(2) | ||
* [3, 8, 'c'] | ||
@@ -1152,8 +1134,8 @@ * ``` | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.select('foo') | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.select('foo') | ||
* shape: (3, 1) | ||
@@ -1181,8 +1163,8 @@ * ┌─────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.shift(1) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.shift(1) | ||
* shape: (3, 3) | ||
@@ -1200,3 +1182,3 @@ * ┌──────┬──────┬──────┐ | ||
* └──────┴──────┴──────┘ | ||
* >>> df.shift(-1) | ||
* > df.shift(-1) | ||
* shape: (3, 3) | ||
@@ -1229,8 +1211,8 @@ * ┌──────┬──────┬──────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.shiftAndFill({periods:1, fill_value:0}) | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.shiftAndFill({periods:1, fill_value:0}) | ||
* shape: (3, 3) | ||
@@ -1271,8 +1253,8 @@ * ┌─────┬─────┬─────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6.0, 7.0, 8.0], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.slice(1, 2) // Alternatively `df.slice({offset:1, length:2})` | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6.0, 7.0, 8.0], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.slice(1, 2) // Alternatively `df.slice({offset:1, length:2})` | ||
* shape: (2, 3) | ||
@@ -1311,8 +1293,8 @@ * ┌─────┬─────┬─────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.std() | ||
* > df = pl.DataFrame({ | ||
* ... "foo": [1, 2, 3], | ||
* ... "bar": [6, 7, 8], | ||
* ... "ham": ['a', 'b', 'c'] | ||
* ... }) | ||
* > df.std() | ||
* shape: (1, 3) | ||
@@ -1344,7 +1326,7 @@ * ╭─────┬─────┬──────╮ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "letters": ["c", "c", "a", "c", "a", "b"], | ||
* >>> "nrs": [1, 2, 3, 4, 5, 6] | ||
* >>> }) | ||
* >>> df | ||
* > df = pl.DataFrame({ | ||
* ... "letters": ["c", "c", "a", "c", "a", "b"], | ||
* ... "nrs": [1, 2, 3, 4, 5, 6] | ||
* ... }) | ||
* > df | ||
* shape: (6, 2) | ||
@@ -1368,6 +1350,5 @@ * ╭─────────┬─────╮ | ||
* ╰─────────┴─────╯ | ||
* >>> df.groupby("letters") | ||
* >>> .tail(2) | ||
* >>> .sort("letters") | ||
* >>> | ||
* > df.groupby("letters") | ||
* ... .tail(2) | ||
* ... .sort("letters") | ||
* shape: (5, 2) | ||
@@ -1392,3 +1373,6 @@ * ╭─────────┬─────╮ | ||
tail(length?: number): DataFrame; | ||
/** @deprecated *since 0.4.0* use {@link writeCSV} */ | ||
/** | ||
* @deprecated *since 0.4.0* use {@link writeCSV} | ||
* @category Deprecated | ||
*/ | ||
toCSV(destOrOptions?: any, options?: any): any; | ||
@@ -1399,3 +1383,3 @@ /** | ||
* ``` | ||
* >>> df.toRecords() | ||
* > df.toRecords() | ||
* [ | ||
@@ -1407,5 +1391,9 @@ * {"foo":1.0,"bar":"a"}, | ||
* ``` | ||
* @category IO | ||
*/ | ||
toRecords(): Record<string, any>[]; | ||
/** compat with `JSON.stringify` */ | ||
/** | ||
* compat with `JSON.stringify` | ||
* @category IO | ||
*/ | ||
toJSON(): string; | ||
@@ -1416,3 +1404,3 @@ /** | ||
* ``` | ||
* >>> df.toObject() | ||
* > df.toObject() | ||
* { | ||
@@ -1423,7 +1411,14 @@ * "foo": [1,2,3], | ||
* ``` | ||
* @category IO | ||
*/ | ||
toObject(): Record<string, any[]>; | ||
/** @deprecated *since 0.4.0* use {@link writeIPC} */ | ||
/** | ||
* @deprecated *since 0.4.0* use {@link writeIPC} | ||
* @category IO Deprecated | ||
*/ | ||
toIPC(destination?: any, options?: any): any; | ||
/** @deprecated *since 0.4.0* use {@link writeParquet} */ | ||
/** | ||
* @deprecated *since 0.4.0* use {@link writeParquet} | ||
* @category IO Deprecated | ||
*/ | ||
toParquet(destination?: any, options?: any): any; | ||
@@ -1433,22 +1428,22 @@ toSeries(index?: number): Series; | ||
/** | ||
Convert a ``DataFrame`` to a ``Series`` of type ``Struct`` | ||
@param name Name for the struct Series | ||
@example | ||
``` | ||
>>> df = pl.DataFrame({ | ||
... "a": [1, 2, 3, 4, 5], | ||
... "b": ["one", "two", "three", "four", "five"], | ||
... }) | ||
>>> df.toStruct("nums") | ||
shape: (5,) | ||
Series: 'nums' [struct[2]{'a': i64, 'b': str}] | ||
[ | ||
{1,"one"} | ||
{2,"two"} | ||
{3,"three"} | ||
{4,"four"} | ||
{5,"five"} | ||
] | ||
``` | ||
*/ | ||
* Convert a ``DataFrame`` to a ``Series`` of type ``Struct`` | ||
* @param name Name for the struct Series | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3, 4, 5], | ||
* ... "b": ["one", "two", "three", "four", "five"], | ||
* ... }) | ||
* > df.toStruct("nums") | ||
* shape: (5,) | ||
* Series: 'nums' [struct[2]{'a': i64, 'b': str}] | ||
* [ | ||
* {1,"one"} | ||
* {2,"two"} | ||
* {3,"three"} | ||
* {4,"four"} | ||
* {5,"five"} | ||
* ] | ||
* ``` | ||
*/ | ||
toStruct(name: string): Series; | ||
@@ -1458,3 +1453,3 @@ /** | ||
* | ||
* @note This is a very expensive operation. Perhaps you can do it differently. | ||
* @remarks This is a very expensive operation. Perhaps you can do it differently. | ||
* @param options | ||
@@ -1466,4 +1461,4 @@ * @param options.includeHeader If set, the column names will be added as first column. | ||
* @example | ||
* >>> df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) | ||
* >>> df.transpose({includeHeader:true}) | ||
* > df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) | ||
* > df.transpose({includeHeader:true}) | ||
* shape: (2, 4) | ||
@@ -1480,3 +1475,3 @@ * ┌────────┬──────────┬──────────┬──────────┐ | ||
* // replace the auto generated column names with a list | ||
* >>> df.transpose({includeHeader:false, columnNames:["a", "b", "c"]}) | ||
* > df.transpose({includeHeader:false, columnNames:["a", "b", "c"]}) | ||
* shape: (2, 3) | ||
@@ -1494,3 +1489,3 @@ * ┌─────┬─────┬─────┐ | ||
* // Include the header as a separate column | ||
* >>> df.transpose({ | ||
* > df.transpose({ | ||
* ... includeHeader:true, | ||
@@ -1512,3 +1507,3 @@ * ... headerName:"foo", | ||
* // Replace the auto generated column with column names from a generator function | ||
* >>> function *namesGenerator() { | ||
* > function *namesGenerator() { | ||
* ... const baseName = "my_column_"; | ||
@@ -1520,3 +1515,3 @@ * ... let count = 0; | ||
* ... } | ||
* >>> df.transpose({includeHeader:false, columnNames:namesGenerator}) | ||
* > df.transpose({includeHeader:false, columnNames:namesGenerator}) | ||
* shape: (2, 3) | ||
@@ -1557,3 +1552,3 @@ * ┌─────────────┬─────────────┬─────────────┐ | ||
``` | ||
>>> df = pl.DataFrame({ | ||
> df = pl.DataFrame({ | ||
... "int": [1, 2], | ||
@@ -1566,3 +1561,3 @@ ... "str": ["a", "b"], | ||
... .toFrame() | ||
>>> df | ||
> df | ||
shape: (2, 1) | ||
@@ -1578,3 +1573,3 @@ ┌─────────────────────────────┐ | ||
└─────────────────────────────┘ | ||
>>> df.unnest("my_struct") | ||
> df.unnest("my_struct") | ||
shape: (2, 4) | ||
@@ -1597,8 +1592,8 @@ ┌─────┬─────┬──────┬────────────┐ | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.var() | ||
* > df = pl.DataFrame({ | ||
* > "foo": [1, 2, 3], | ||
* > "bar": [6, 7, 8], | ||
* > "ham": ['a', 'b', 'c'] | ||
* > }) | ||
* > df.var() | ||
* shape: (1, 3) | ||
@@ -1620,13 +1615,13 @@ * ╭─────┬─────┬──────╮ | ||
* ``` | ||
* >>> df1 = pl.DataFrame({ | ||
* >>> "foo": [1, 2], | ||
* >>> "bar": [6, 7], | ||
* >>> "ham": ['a', 'b'] | ||
* >>> }) | ||
* >>> df2 = pl.DataFrame({ | ||
* >>> "foo": [3, 4], | ||
* >>> "bar": [8 , 9], | ||
* >>> "ham": ['c', 'd'] | ||
* >>> }) | ||
* >>> df1.vstack(df2) | ||
* > df1 = pl.DataFrame({ | ||
* ... "foo": [1, 2], | ||
* ... "bar": [6, 7], | ||
* ... "ham": ['a', 'b'] | ||
* ... }) | ||
* > df2 = pl.DataFrame({ | ||
* ... "foo": [3, 4], | ||
* ... "bar": [8 , 9], | ||
* ... "ham": ['c', 'd'] | ||
* ... }) | ||
* > df1.vstack(df2) | ||
* shape: (4, 3) | ||
@@ -1678,4 +1673,29 @@ * ╭─────┬─────┬─────╮ | ||
export declare const _DataFrame: (_df: any) => DataFrame; | ||
/** | ||
* DataFrame constructor | ||
*/ | ||
export interface DataFrameConstructor extends Deserialize<DataFrame> { | ||
/** | ||
* Create an empty DataFrame | ||
*/ | ||
(): DataFrame; | ||
/** | ||
* Create a DataFrame from a JavaScript object | ||
* @example | ||
* ``` | ||
* data = {'a': [1n, 2n], 'b': [3, 4]} | ||
* df = pl.DataFrame(data) | ||
* df | ||
* shape: (2, 2) | ||
* ╭─────┬─────╮ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ u64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 4 │ | ||
* ╰─────┴─────╯ | ||
* ``` | ||
*/ | ||
(data: any, options?: { | ||
@@ -1682,0 +1702,0 @@ columns?: any[]; |
@@ -13,3 +13,3 @@ "use strict"; | ||
const expr_1 = require("./lazy/expr"); | ||
const series_1 = require("./series/series"); | ||
const series_1 = require("./series"); | ||
const stream_1 = require("stream"); | ||
@@ -155,4 +155,3 @@ const datatypes_1 = require("./datatypes"); | ||
filter(predicate) { | ||
return this.lazy().filter(predicate) | ||
.collectSync(); | ||
return this.lazy().filter(predicate).collectSync(); | ||
}, | ||
@@ -181,3 +180,3 @@ fillNull(strategy) { | ||
groupBy(...by) { | ||
return (0, groupby_1.GroupBy)(_df, (0, utils_1.columnOrColumnsStrict)(by)); | ||
return (0, groupby_1._GroupBy)(_df, (0, utils_1.columnOrColumnsStrict)(by)); | ||
}, | ||
@@ -291,12 +290,14 @@ groupByRolling(opts) { | ||
else { | ||
fn = { | ||
first: (0, functions_2.element)().first(), | ||
sum: (0, functions_2.element)().sum(), | ||
max: (0, functions_2.element)().max(), | ||
min: (0, functions_2.element)().min(), | ||
mean: (0, functions_2.element)().mean(), | ||
median: (0, functions_2.element)().median(), | ||
last: (0, functions_2.element)().last(), | ||
count: (0, functions_2.element)().count() | ||
}[aggregateFunc] ?? new Error(`Unknown aggregate function ${aggregateFunc}`); | ||
fn = | ||
{ | ||
first: (0, functions_2.element)().first(), | ||
sum: (0, functions_2.element)().sum(), | ||
max: (0, functions_2.element)().max(), | ||
min: (0, functions_2.element)().min(), | ||
mean: (0, functions_2.element)().mean(), | ||
median: (0, functions_2.element)().median(), | ||
last: (0, functions_2.element)().last(), | ||
count: (0, functions_2.element)().count(), | ||
}[aggregateFunc] ?? | ||
new Error(`Unknown aggregate function ${aggregateFunc}`); | ||
if (fn instanceof Error) { | ||
@@ -332,2 +333,3 @@ throw fn; | ||
sample(opts, frac, withReplacement = false, seed) { | ||
// rome-ignore lint/style/noArguments: <explanation> | ||
if (arguments.length === 0) { | ||
@@ -352,5 +354,3 @@ return wrap("sampleN", 1, withReplacement, false, seed); | ||
if (hasExpr) { | ||
return (0, exports._DataFrame)(_df).lazy() | ||
.select(selection) | ||
.collectSync(); | ||
return (0, exports._DataFrame)(_df).lazy().select(selection).collectSync(); | ||
} | ||
@@ -357,0 +357,0 @@ else { |
@@ -53,2 +53,5 @@ import { Field } from "./field"; | ||
static List(inner: DataType): DataType; | ||
/** | ||
* Struct type | ||
*/ | ||
static Struct(fields: Field[]): DataType; | ||
@@ -63,9 +66,6 @@ static Struct(fields: { | ||
[x: string]: { | ||
variant: string; | ||
inner: any; | ||
[x: string]: any; | ||
}; | ||
} | { | ||
[x: string]: { | ||
variant: string; | ||
}; | ||
[x: string]: string; | ||
}; | ||
@@ -104,2 +104,5 @@ static from(obj: any): DataType; | ||
} | ||
/** | ||
* Datetime type | ||
*/ | ||
declare class _Datetime extends DataType { | ||
@@ -127,2 +130,5 @@ private timeUnit; | ||
} | ||
/** | ||
* Datetime time unit | ||
*/ | ||
export declare enum TimeUnit { | ||
@@ -133,23 +139,48 @@ Nanoseconds = "ns", | ||
} | ||
/** | ||
* @ignore | ||
* Timeunit namespace | ||
*/ | ||
export declare namespace TimeUnit { | ||
function from(s: "ms" | "ns" | "us"): TimeUnit; | ||
} | ||
/** | ||
* Datatype namespace | ||
*/ | ||
export declare namespace DataType { | ||
/** Null */ | ||
type Null = _Null; | ||
/** Boolean */ | ||
type Bool = _Bool; | ||
/** Int8 */ | ||
type Int8 = _Int8; | ||
/** Int16 */ | ||
type Int16 = _Int16; | ||
/** Int32 */ | ||
type Int32 = _Int32; | ||
/** Int64 */ | ||
type Int64 = _Int64; | ||
/** UInt8 */ | ||
type UInt8 = _UInt8; | ||
/** UInt16 */ | ||
type UInt16 = _UInt16; | ||
/** UInt32 */ | ||
type UInt32 = _UInt32; | ||
/** UInt64 */ | ||
type UInt64 = _UInt64; | ||
/** Float32 */ | ||
type Float32 = _Float32; | ||
/** Float64 */ | ||
type Float64 = _Float64; | ||
/** Date dtype */ | ||
type Date = _Date; | ||
/** Datetime */ | ||
type Datetime = _Datetime; | ||
/** Utf8 */ | ||
type Utf8 = _Utf8; | ||
/** Categorical */ | ||
type Categorical = _Categorical; | ||
/** List */ | ||
type List = _List; | ||
/** Struct */ | ||
type Struct = _Struct; | ||
@@ -156,0 +187,0 @@ /** |
@@ -103,3 +103,8 @@ "use strict"; | ||
toString() { | ||
return `${this.identity}.${this.variant}`; | ||
if (this.inner) { | ||
return `${this.identity}(${this.variant}(${this.inner}))`; | ||
} | ||
else { | ||
return `${this.identity}(${this.variant})`; | ||
} | ||
} | ||
@@ -111,4 +116,3 @@ toJSON() { | ||
[this.identity]: { | ||
variant: this.variant, | ||
inner, | ||
[this.variant]: inner[0], | ||
}, | ||
@@ -119,5 +123,3 @@ }; | ||
return { | ||
[this.identity]: { | ||
variant: this.variant, | ||
}, | ||
[this.identity]: this.variant, | ||
}; | ||
@@ -168,2 +170,5 @@ } | ||
} | ||
/** | ||
* Datetime type | ||
*/ | ||
class _Datetime extends DataType { | ||
@@ -233,7 +238,11 @@ constructor(timeUnit, timeZone) { | ||
return { | ||
variant: this.variant, | ||
fields: this.fields.map(fld => fld.toJSON()) | ||
[this.identity]: { | ||
[this.variant]: this.fields, | ||
}, | ||
}; | ||
} | ||
} | ||
/** | ||
* Datetime time unit | ||
*/ | ||
var TimeUnit; | ||
@@ -245,2 +254,6 @@ (function (TimeUnit) { | ||
})(TimeUnit = exports.TimeUnit || (exports.TimeUnit = {})); | ||
/** | ||
* @ignore | ||
* Timeunit namespace | ||
*/ | ||
(function (TimeUnit) { | ||
@@ -252,2 +265,5 @@ function from(s) { | ||
})(TimeUnit = exports.TimeUnit || (exports.TimeUnit = {})); | ||
/** | ||
* Datatype namespace | ||
*/ | ||
(function (DataType) { | ||
@@ -264,3 +280,5 @@ /** | ||
if (variant === "Struct") { | ||
inner = [inner[0].map(fld => field_1.Field.from(fld.name, deserialize(fld.dtype)))]; | ||
inner = [ | ||
inner[0].map((fld) => field_1.Field.from(fld.name, deserialize(fld.dtype))), | ||
]; | ||
} | ||
@@ -267,0 +285,0 @@ if (variant === "List") { |
import { DataType } from "./datatype"; | ||
/** | ||
* A field is a name and a datatype. | ||
*/ | ||
export interface Field { | ||
@@ -6,3 +9,3 @@ name: string; | ||
} | ||
export declare class Field { | ||
export declare class Field implements Field { | ||
name: string; | ||
@@ -14,3 +17,3 @@ dtype: DataType; | ||
name: string; | ||
dtype: string; | ||
dtype: DataType; | ||
}; | ||
@@ -17,0 +20,0 @@ } |
@@ -10,3 +10,3 @@ "use strict"; | ||
toString() { | ||
return `Field("${this.name}": ${this.dtype})`; | ||
return `Field("${this.name}", ${this.dtype})`; | ||
} | ||
@@ -16,3 +16,3 @@ toJSON() { | ||
name: this.name, | ||
dtype: this.dtype.toString(), | ||
dtype: this.dtype, | ||
}; | ||
@@ -19,0 +19,0 @@ } |
@@ -1,39 +0,18 @@ | ||
import { DataType } from "./datatype"; | ||
export { DataType }; | ||
export declare type TypedArray = Int8Array | Int16Array | Int32Array | BigInt64Array | Uint8Array | Uint16Array | Uint32Array | BigInt64Array | Float32Array | Float64Array; | ||
export declare type Optional<T> = T | undefined | null; | ||
export declare enum _DataType { | ||
Int8 = 0, | ||
Int16 = 1, | ||
Int32 = 2, | ||
Int64 = 3, | ||
UInt8 = 4, | ||
UInt16 = 5, | ||
UInt32 = 6, | ||
UInt64 = 7, | ||
Float32 = 8, | ||
Float64 = 9, | ||
Bool = 10, | ||
Utf8 = 11, | ||
List = 12, | ||
Date = 13, | ||
Datetime = 14, | ||
Time = 15, | ||
Object = 16, | ||
Categorical = 17, | ||
Struct = 18 | ||
} | ||
export declare type JsDataFrame = any; | ||
export declare type NullValues = string | Array<string> | Record<string, string>; | ||
export declare type JoinBaseOptions = { | ||
how?: "left" | "inner" | "outer" | "semi" | "anti" | "cross"; | ||
suffix?: string; | ||
}; | ||
export declare type JoinOptions = { | ||
leftOn?: string | Array<string>; | ||
rightOn?: string | Array<string>; | ||
on?: string | Array<string>; | ||
how?: "left" | "inner" | "outer" | "semi" | "anti" | "cross"; | ||
suffix?: string; | ||
}; | ||
import { DataType, TimeUnit } from "./datatype"; | ||
export { DataType, TimeUnit }; | ||
export { Field } from "./field"; | ||
/** @ignore */ | ||
export type TypedArray = Int8Array | Int16Array | Int32Array | BigInt64Array | Uint8Array | Uint16Array | Uint32Array | BigInt64Array | Float32Array | Float64Array; | ||
/** | ||
* @ignore | ||
*/ | ||
export type Optional<T> = T | undefined | null; | ||
/** | ||
* @ignore | ||
*/ | ||
export type JsDataFrame = any; | ||
export type NullValues = string | Array<string> | Record<string, string>; | ||
/** | ||
* @ignore | ||
*/ | ||
export declare const DTYPE_TO_FFINAME: { | ||
@@ -60,2 +39,3 @@ Int8: string; | ||
}; | ||
/** @ignore */ | ||
export declare const polarsTypeToConstructor: (dtype: DataType) => CallableFunction; |
@@ -6,28 +6,12 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.polarsTypeToConstructor = exports.DTYPE_TO_FFINAME = exports._DataType = exports.DataType = void 0; | ||
exports.polarsTypeToConstructor = exports.DTYPE_TO_FFINAME = exports.Field = exports.TimeUnit = exports.DataType = void 0; | ||
const datatype_1 = require("./datatype"); | ||
Object.defineProperty(exports, "DataType", { enumerable: true, get: function () { return datatype_1.DataType; } }); | ||
Object.defineProperty(exports, "TimeUnit", { enumerable: true, get: function () { return datatype_1.TimeUnit; } }); | ||
var field_1 = require("./field"); | ||
Object.defineProperty(exports, "Field", { enumerable: true, get: function () { return field_1.Field; } }); | ||
const polars_internal_1 = __importDefault(require("../internals/polars_internal")); | ||
var _DataType; | ||
(function (_DataType) { | ||
_DataType[_DataType["Int8"] = 0] = "Int8"; | ||
_DataType[_DataType["Int16"] = 1] = "Int16"; | ||
_DataType[_DataType["Int32"] = 2] = "Int32"; | ||
_DataType[_DataType["Int64"] = 3] = "Int64"; | ||
_DataType[_DataType["UInt8"] = 4] = "UInt8"; | ||
_DataType[_DataType["UInt16"] = 5] = "UInt16"; | ||
_DataType[_DataType["UInt32"] = 6] = "UInt32"; | ||
_DataType[_DataType["UInt64"] = 7] = "UInt64"; | ||
_DataType[_DataType["Float32"] = 8] = "Float32"; | ||
_DataType[_DataType["Float64"] = 9] = "Float64"; | ||
_DataType[_DataType["Bool"] = 10] = "Bool"; | ||
_DataType[_DataType["Utf8"] = 11] = "Utf8"; | ||
_DataType[_DataType["List"] = 12] = "List"; | ||
_DataType[_DataType["Date"] = 13] = "Date"; | ||
_DataType[_DataType["Datetime"] = 14] = "Datetime"; | ||
_DataType[_DataType["Time"] = 15] = "Time"; | ||
_DataType[_DataType["Object"] = 16] = "Object"; | ||
_DataType[_DataType["Categorical"] = 17] = "Categorical"; | ||
_DataType[_DataType["Struct"] = 18] = "Struct"; | ||
})(_DataType = exports._DataType || (exports._DataType = {})); | ||
/** | ||
* @ignore | ||
*/ | ||
exports.DTYPE_TO_FFINAME = { | ||
@@ -104,9 +88,10 @@ Int8: "I8", | ||
}; | ||
/** @ignore */ | ||
const polarsTypeToConstructor = (dtype) => { | ||
const constructor = POLARS_TYPE_TO_CONSTRUCTOR[dtype.variant]; | ||
if (!constructor) { | ||
const ctor = POLARS_TYPE_TO_CONSTRUCTOR[dtype.variant]; | ||
if (!ctor) { | ||
throw new Error(`Cannot construct Series for type ${dtype.variant}.`); | ||
} | ||
return constructor; | ||
return ctor; | ||
}; | ||
exports.polarsTypeToConstructor = polarsTypeToConstructor; |
@@ -1,7 +0,4 @@ | ||
import { Series } from "./series/series"; | ||
import { Series } from "./series"; | ||
import { DataFrame } from "./dataframe"; | ||
declare type ConcatOptions = { | ||
rechunk?: boolean; | ||
how?: "vertical" | "horizontal"; | ||
}; | ||
import { ConcatOptions } from "./types"; | ||
/** | ||
@@ -16,4 +13,4 @@ * _Repeat a single value n times and collect into a Series._ | ||
* | ||
* > const s = pl.repeat("a", 5) | ||
* > s.toArray() | ||
* > const s = pl.repeat("a", 5) | ||
* > s.toArray() | ||
* ["a", "a", "a", "a", "a"] | ||
@@ -33,5 +30,5 @@ * | ||
* @example | ||
* >>> const df1 = pl.DataFrame({"a": [1], "b": [3]}) | ||
* >>> const df2 = pl.DataFrame({"a": [2], "b": [4]}) | ||
* >>> pl.concat([df1, df2]) | ||
* > const df1 = pl.DataFrame({"a": [1], "b": [3]}) | ||
* > const df2 = pl.DataFrame({"a": [2], "b": [4]}) | ||
* > pl.concat([df1, df2]) | ||
* shape: (2, 2) | ||
@@ -52,2 +49,1 @@ * ┌─────┬─────┐ | ||
}): Series; | ||
export {}; |
@@ -9,3 +9,3 @@ "use strict"; | ||
const construction_1 = require("./internals/construction"); | ||
const series_1 = require("./series/series"); | ||
const series_1 = require("./series"); | ||
const dataframe_1 = require("./dataframe"); | ||
@@ -23,4 +23,4 @@ const polars_internal_1 = __importDefault(require("./internals/polars_internal")); | ||
* | ||
* > const s = pl.repeat("a", 5) | ||
* > s.toArray() | ||
* > const s = pl.repeat("a", 5) | ||
* > s.toArray() | ||
* ["a", "a", "a", "a", "a"] | ||
@@ -27,0 +27,0 @@ * |
@@ -26,12 +26,12 @@ import { DataFrame } from "./dataframe"; | ||
* // use lazy api rest parameter style | ||
* >>> df.groupBy('foo', 'bar') | ||
* >>> .agg(pl.sum('ham'), col('spam').tail(4).sum()) | ||
* > df.groupBy('foo', 'bar') | ||
* > .agg(pl.sum('ham'), col('spam').tail(4).sum()) | ||
* | ||
* // use lazy api array style | ||
* >>> df.groupBy('foo', 'bar') | ||
* >>> .agg([pl.sum('ham'), col('spam').tail(4).sum()]) | ||
* > df.groupBy('foo', 'bar') | ||
* > .agg([pl.sum('ham'), col('spam').tail(4).sum()]) | ||
* | ||
* // use a mapping | ||
* >>> df.groupBy('foo', 'bar') | ||
* >>> .agg({'spam': ['sum', 'min']}) | ||
* > df.groupBy('foo', 'bar') | ||
* > .agg({'spam': ['sum', 'min']}) | ||
* | ||
@@ -61,7 +61,7 @@ * ``` | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "letters": ["c", "c", "a", "c", "a", "b"], | ||
* >>> "nrs": [1, 2, 3, 4, 5, 6] | ||
* >>> }) | ||
* >>> df | ||
* > df = pl.DataFrame({ | ||
* > "letters": ["c", "c", "a", "c", "a", "b"], | ||
* > "nrs": [1, 2, 3, 4, 5, 6] | ||
* > }) | ||
* > df | ||
* shape: (6, 2) | ||
@@ -85,6 +85,6 @@ * ╭─────────┬─────╮ | ||
* ╰─────────┴─────╯ | ||
* >>> df.groupby("letters") | ||
* >>> .head(2) | ||
* >>> .sort("letters"); | ||
* >>> | ||
* > df.groupby("letters") | ||
* > .head(2) | ||
* > .sort("letters"); | ||
* > >> | ||
* shape: (5, 2) | ||
@@ -139,3 +139,3 @@ * ╭─────────┬─────╮ | ||
*/ | ||
pivot({ pivotCol, valuesCol }: { | ||
pivot({ pivotCol, valuesCol, }: { | ||
pivotCol: string; | ||
@@ -156,14 +156,23 @@ valuesCol: string; | ||
} | ||
export declare type PivotOps = Pick<GroupBy, "count" | "first" | "max" | "mean" | "median" | "min" | "sum"> & { | ||
export type PivotOps = Pick<GroupBy, "count" | "first" | "max" | "mean" | "median" | "min" | "sum"> & { | ||
[inspect](): string; | ||
}; | ||
export declare function GroupBy(df: any, by: string[], maintainOrder?: boolean): GroupBy; | ||
/** @ignore */ | ||
export declare function _GroupBy(df: any, by: string[], maintainOrder?: boolean): GroupBy; | ||
/** | ||
* intermediate state of a rolling groupby | ||
*/ | ||
export interface RollingGroupBy { | ||
agg(column: ColumnsOrExpr, ...columns: ColumnsOrExpr[]): DataFrame; | ||
} | ||
/** @ignore */ | ||
export declare function RollingGroupBy(df: any, indexColumn: string, period: string, offset?: string, closed?: any, by?: ColumnsOrExpr): RollingGroupBy; | ||
/** | ||
* intermediate state of a dynamic groupby | ||
*/ | ||
export interface DynamicGroupBy { | ||
agg(column: ColumnsOrExpr, ...columns: ColumnsOrExpr[]): DataFrame; | ||
} | ||
/** @ignore */ | ||
export declare function DynamicGroupBy(df: any, indexColumn: string, every: string, period?: string, offset?: string, truncate?: boolean, includeBoundaries?: boolean, closed?: string, by?: ColumnsOrExpr): DynamicGroupBy; | ||
export {}; |
"use strict"; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); | ||
var desc = Object.getOwnPropertyDescriptor(m, k); | ||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { | ||
desc = { enumerable: true, get: function() { return m[k]; } }; | ||
} | ||
Object.defineProperty(o, k2, desc); | ||
}) : (function(o, m, k, k2) { | ||
@@ -25,3 +29,3 @@ if (k2 === undefined) k2 = k; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.DynamicGroupBy = exports.RollingGroupBy = exports.GroupBy = void 0; | ||
exports.DynamicGroupBy = exports.RollingGroupBy = exports._GroupBy = void 0; | ||
const dataframe_1 = require("./dataframe"); | ||
@@ -33,3 +37,4 @@ const utils = __importStar(require("./utils")); | ||
const inspectOpts = { colors: true, depth: null }; | ||
function GroupBy(df, by, maintainOrder = false) { | ||
/** @ignore */ | ||
function _GroupBy(df, by, maintainOrder = false) { | ||
const customInspect = () => util_1.default.formatWithOptions(inspectOpts, "GroupBy {by: %O}", by); | ||
@@ -57,5 +62,4 @@ const pivot = (opts, valuesCol) => { | ||
else { | ||
let pairs = Object.entries(aggs[0]) | ||
.flatMap(([key, values]) => { | ||
return [values].flat(2).map(v => (0, functions_1.col)(key)[v]()); | ||
let pairs = Object.entries(aggs[0]).flatMap(([key, values]) => { | ||
return [values].flat(2).map((v) => (0, functions_1.col)(key)[v]()); | ||
}); | ||
@@ -94,3 +98,3 @@ return (0, dataframe_1._DataFrame)(df) | ||
} | ||
exports.GroupBy = GroupBy; | ||
exports._GroupBy = _GroupBy; | ||
function PivotOps(df, by, pivotCol, valueCol) { | ||
@@ -110,2 +114,3 @@ const pivot = (agg) => () => (0, dataframe_1._DataFrame)(df.pivot([by].flat(), [pivotCol], [valueCol], agg)); | ||
} | ||
/** @ignore */ | ||
function RollingGroupBy(df, indexColumn, period, offset, closed, by) { | ||
@@ -119,6 +124,7 @@ return { | ||
.collectSync(); | ||
} | ||
}, | ||
}; | ||
} | ||
exports.RollingGroupBy = RollingGroupBy; | ||
/** @ignore */ | ||
function DynamicGroupBy(df, indexColumn, every, period, offset, truncate, includeBoundaries, closed, by) { | ||
@@ -129,8 +135,17 @@ return { | ||
.lazy() | ||
.groupByDynamic({ indexColumn, every, period, offset, truncate, includeBoundaries, closed, by }) | ||
.groupByDynamic({ | ||
indexColumn, | ||
every, | ||
period, | ||
offset, | ||
truncate, | ||
includeBoundaries, | ||
closed, | ||
by, | ||
}) | ||
.agg(column, ...columns) | ||
.collectSync({ noOptimizations: true }); | ||
} | ||
}, | ||
}; | ||
} | ||
exports.DynamicGroupBy = DynamicGroupBy; |
@@ -1,4 +0,4 @@ | ||
import * as series from "./series/series"; | ||
import * as series from "./series"; | ||
import * as df from "./dataframe"; | ||
import { DataType } from "./datatypes"; | ||
import { DataType, Field as _field } from "./datatypes"; | ||
import * as func from "./functions"; | ||
@@ -8,12 +8,23 @@ import * as io from "./io"; | ||
import * as ldf from "./lazy/dataframe"; | ||
import { funcs as lazy, Expr as lazyExpr, GroupBy as lazyGroupBy, when as _when } from "./lazy"; | ||
declare namespace pl { | ||
export import Expr = lazyExpr.Expr; | ||
export { DataType, Field, TimeUnit } from "./datatypes"; | ||
export * from "./series"; | ||
export { Expr } from "./lazy/expr"; | ||
export * from "./dataframe"; | ||
export * from "./functions"; | ||
export * from "./io"; | ||
export * from "./cfg"; | ||
export * from "./lazy/dataframe"; | ||
export * from "./lazy"; | ||
import * as lazy from "./lazy"; | ||
export * from "./types"; | ||
export type { GroupBy } from "./groupby"; | ||
export declare namespace pl { | ||
export import Expr = lazy.Expr; | ||
export import DataFrame = df.DataFrame; | ||
export import LazyDataFrame = ldf.LazyDataFrame; | ||
export import Series = series.Series; | ||
type LazyGroupBy = lazyGroupBy; | ||
type When = _when.When; | ||
type WhenThen = _when.WhenThen; | ||
type WhenThenThen = _when.WhenThenThen; | ||
type LazyGroupBy = lazy.LazyGroupBy; | ||
type When = lazy.When; | ||
type WhenThen = lazy.WhenThen; | ||
type WhenThenThen = lazy.WhenThenThen; | ||
export import Config = cfg.Config; | ||
@@ -37,3 +48,6 @@ export import Int8 = DataType.Int8; | ||
export import Object = DataType.Object; | ||
export import Null = DataType.Null; | ||
export import Struct = DataType.Struct; | ||
export import Categorical = DataType.Categorical; | ||
export import Field = _field; | ||
export import repeat = func.repeat; | ||
@@ -64,2 +78,3 @@ export import concat = func.concat; | ||
export import exclude = lazy.exclude; | ||
export import element = lazy.element; | ||
export import first = lazy.first; | ||
@@ -80,5 +95,4 @@ export import format = lazy.format; | ||
export import list = lazy.list; | ||
export import when = _when.when; | ||
const version: any; | ||
} | ||
export = pl; | ||
export default pl; |
"use strict"; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); | ||
var desc = Object.getOwnPropertyDescriptor(m, k); | ||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { | ||
desc = { enumerable: true, get: function() { return m[k]; } }; | ||
} | ||
Object.defineProperty(o, k2, desc); | ||
}) : (function(o, m, k, k2) { | ||
@@ -21,6 +25,11 @@ if (k2 === undefined) k2 = k; | ||
}; | ||
var __exportStar = (this && this.__exportStar) || function(m, exports) { | ||
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
const series = __importStar(require("./series/series")); | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.pl = exports.Expr = exports.TimeUnit = exports.Field = exports.DataType = void 0; | ||
const series = __importStar(require("./series")); | ||
const df = __importStar(require("./dataframe")); | ||
@@ -33,6 +42,20 @@ const datatypes_1 = require("./datatypes"); | ||
const polars_internal_1 = __importDefault(require("./internals/polars_internal")); | ||
const lazy_1 = require("./lazy"); | ||
var datatypes_2 = require("./datatypes"); | ||
Object.defineProperty(exports, "DataType", { enumerable: true, get: function () { return datatypes_2.DataType; } }); | ||
Object.defineProperty(exports, "Field", { enumerable: true, get: function () { return datatypes_2.Field; } }); | ||
Object.defineProperty(exports, "TimeUnit", { enumerable: true, get: function () { return datatypes_2.TimeUnit; } }); | ||
__exportStar(require("./series"), exports); | ||
var expr_1 = require("./lazy/expr"); | ||
Object.defineProperty(exports, "Expr", { enumerable: true, get: function () { return expr_1.Expr; } }); | ||
__exportStar(require("./dataframe"), exports); | ||
__exportStar(require("./functions"), exports); | ||
__exportStar(require("./io"), exports); | ||
__exportStar(require("./cfg"), exports); | ||
__exportStar(require("./lazy/dataframe"), exports); | ||
__exportStar(require("./lazy"), exports); | ||
const lazy = __importStar(require("./lazy")); | ||
__exportStar(require("./types"), exports); | ||
var pl; | ||
(function (pl) { | ||
pl.Expr = lazy_1.Expr.Expr; | ||
pl.Expr = lazy.Expr; | ||
pl.DataFrame = df.DataFrame; | ||
@@ -55,7 +78,12 @@ pl.LazyDataFrame = ldf.LazyDataFrame; | ||
pl.List = datatypes_1.DataType.List; | ||
// rome-ignore lint/suspicious/noShadowRestrictedNames: pl.Date | ||
pl.Date = datatypes_1.DataType.Date; | ||
pl.Datetime = datatypes_1.DataType.Datetime; | ||
pl.Time = datatypes_1.DataType.Time; | ||
// rome-ignore lint/suspicious/noShadowRestrictedNames: pl.Object | ||
pl.Object = datatypes_1.DataType.Object; | ||
pl.Null = datatypes_1.DataType.Null; | ||
pl.Struct = datatypes_1.DataType.Struct; | ||
pl.Categorical = datatypes_1.DataType.Categorical; | ||
pl.Field = datatypes_1.Field; | ||
pl.repeat = func.repeat; | ||
@@ -77,34 +105,32 @@ pl.concat = func.concat; | ||
// lazy | ||
pl.col = lazy_1.funcs.col; | ||
pl.cols = lazy_1.funcs.cols; | ||
pl.lit = lazy_1.funcs.lit; | ||
pl.arange = lazy_1.funcs.arange; | ||
pl.argSortBy = lazy_1.funcs.argSortBy; | ||
pl.avg = lazy_1.funcs.avg; | ||
pl.concatList = lazy_1.funcs.concatList; | ||
pl.concatString = lazy_1.funcs.concatString; | ||
pl.count = lazy_1.funcs.count; | ||
pl.cov = lazy_1.funcs.cov; | ||
pl.exclude = lazy_1.funcs.exclude; | ||
pl.first = lazy_1.funcs.first; | ||
pl.format = lazy_1.funcs.format; | ||
pl.groups = lazy_1.funcs.groups; | ||
pl.head = lazy_1.funcs.head; | ||
pl.last = lazy_1.funcs.last; | ||
pl.mean = lazy_1.funcs.mean; | ||
pl.median = lazy_1.funcs.median; | ||
pl.nUnique = lazy_1.funcs.nUnique; | ||
pl.pearsonCorr = lazy_1.funcs.pearsonCorr; | ||
pl.quantile = lazy_1.funcs.quantile; | ||
pl.select = lazy_1.funcs.select; | ||
pl.struct = lazy_1.funcs.struct; | ||
pl.spearmanRankCorr = lazy_1.funcs.spearmanRankCorr; | ||
pl.tail = lazy_1.funcs.tail; | ||
pl.list = lazy_1.funcs.list; | ||
pl.when = lazy_1.when.when; | ||
pl.col = lazy.col; | ||
pl.cols = lazy.cols; | ||
pl.lit = lazy.lit; | ||
pl.arange = lazy.arange; | ||
pl.argSortBy = lazy.argSortBy; | ||
pl.avg = lazy.avg; | ||
pl.concatList = lazy.concatList; | ||
pl.concatString = lazy.concatString; | ||
pl.count = lazy.count; | ||
pl.cov = lazy.cov; | ||
pl.exclude = lazy.exclude; | ||
pl.element = lazy.element; | ||
pl.first = lazy.first; | ||
pl.format = lazy.format; | ||
pl.groups = lazy.groups; | ||
pl.head = lazy.head; | ||
pl.last = lazy.last; | ||
pl.mean = lazy.mean; | ||
pl.median = lazy.median; | ||
pl.nUnique = lazy.nUnique; | ||
pl.pearsonCorr = lazy.pearsonCorr; | ||
pl.quantile = lazy.quantile; | ||
pl.select = lazy.select; | ||
pl.struct = lazy.struct; | ||
pl.spearmanRankCorr = lazy.spearmanRankCorr; | ||
pl.tail = lazy.tail; | ||
pl.list = lazy.list; | ||
pl.version = polars_internal_1.default.version(); | ||
})(pl || (pl = {})); | ||
// add this globally so packages can reuse it. | ||
})(pl = exports.pl || (exports.pl = {})); | ||
// eslint-disable-next-line no-undef | ||
global[Symbol.for("__pl__")] = pl; | ||
module.exports = pl; | ||
exports.default = pl; |
@@ -10,3 +10,3 @@ "use strict"; | ||
const types_1 = require("util/types"); | ||
const series_1 = require("../series/series"); | ||
const series_1 = require("../series"); | ||
const datatype_1 = require("../datatypes/datatype"); | ||
@@ -79,7 +79,7 @@ const field_1 = require("../datatypes/field"); | ||
* ``` | ||
* >>> const input = [null, [], [null, "a", "b"]] | ||
* >>> firstNonNull(input) | ||
* > const input = [null, [], [null, "a", "b"]] | ||
* > firstNonNull(input) | ||
* ["a"] | ||
* >>> const ints = [null, 1] | ||
* >>> firstNonNull(ints) | ||
* > const ints = [null, 1] | ||
* > firstNonNull(ints) | ||
* 1 | ||
@@ -89,3 +89,3 @@ * ``` | ||
const firstNonNull = (arr) => { | ||
const first = arr.find(x => x !== null && x !== undefined); | ||
const first = arr.find((x) => x !== null && x !== undefined); | ||
if (Array.isArray(first)) { | ||
@@ -132,3 +132,3 @@ return [firstNonNull(arr.flat())]; | ||
//Empty sequence defaults to Float64 type | ||
if (!values?.length && !dtype) { | ||
if (!(values?.length || dtype)) { | ||
dtype = datatypes_1.DataType.Float64; | ||
@@ -139,4 +139,4 @@ } | ||
const listDtype = (0, exports.jsTypeToPolarsType)(firstValue); | ||
const constructor = (0, datatypes_1.polarsTypeToConstructor)(datatypes_1.DataType.List(listDtype)); | ||
return constructor(name, values, strict, listDtype); | ||
const ctor = (0, datatypes_1.polarsTypeToConstructor)(datatypes_1.DataType.List(listDtype)); | ||
return ctor(name, values, strict, listDtype); | ||
} | ||
@@ -153,4 +153,4 @@ dtype = dtype ?? (0, exports.jsTypeToPolarsType)(firstValue); | ||
else { | ||
const constructor = (0, datatypes_1.polarsTypeToConstructor)(dtype); | ||
series = constructor(name, values, strict); | ||
const ctor = (0, datatypes_1.polarsTypeToConstructor)(dtype); | ||
series = ctor(name, values, strict); | ||
} | ||
@@ -221,3 +221,3 @@ if ([ | ||
if (!data) { | ||
return columns.map(c => series_1.Series.from(c, [])._s); | ||
return columns.map((c) => series_1.Series.from(c, [])._s); | ||
} | ||
@@ -224,0 +224,0 @@ else if (data.length === columns.length) { |
605
bin/io.d.ts
/// <reference types="node" /> | ||
/// <reference types="node" /> | ||
import { DataType } from "./datatypes"; | ||
@@ -6,2 +7,32 @@ import { DataFrame } from "./dataframe"; | ||
import { Readable } from "stream"; | ||
export interface ReadCsvOptions { | ||
inferSchemaLength: number | null; | ||
nRows: number; | ||
batchSize: number; | ||
hasHeader: boolean; | ||
ignoreErrors: boolean; | ||
endRows: number; | ||
startRows: number; | ||
projection: number; | ||
sep: string; | ||
columns: string[]; | ||
rechunk: boolean; | ||
encoding: "utf8" | "utf8-lossy"; | ||
numThreads: number; | ||
dtype: any; | ||
lowMemory: boolean; | ||
commentChar: string; | ||
quotChar: string; | ||
nullValues: string | Array<string> | Record<string, string>; | ||
chunkSize: number; | ||
skipRows: number; | ||
parseDates: boolean; | ||
skipRowsAfterHeader: number; | ||
rowCount: any; | ||
} | ||
export interface ReadJsonOptions { | ||
batchSize: number; | ||
inferSchemaLength: number | null; | ||
format: "lines" | "json"; | ||
} | ||
export declare function readRecords(records: Record<string, any>[], options?: { | ||
@@ -14,147 +45,167 @@ schema: Record<string, DataType>; | ||
/** | ||
* __Read a CSV file or string into a Dataframe.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns DataFrame | ||
*/ | ||
export declare function readCSV(pathOrBody: string | Buffer, options?: any): DataFrame; | ||
* __Read a CSV file or string into a Dataframe.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.nRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns DataFrame | ||
*/ | ||
export declare function readCSV(pathOrBody: string | Buffer, options?: Partial<ReadCsvOptions>): DataFrame; | ||
export interface ScanCsvOptions { | ||
hasHeader: boolean; | ||
sep: string; | ||
commentChar: string; | ||
quoteChar: string; | ||
skipRows: number; | ||
nullValues: string | Array<string> | Record<string, string>; | ||
ignoreErrors: boolean; | ||
cache: boolean; | ||
inferSchemaLength: number | null; | ||
rechunk: boolean; | ||
nRows: number; | ||
encoding: string; | ||
lowMemory: boolean; | ||
parseDates: boolean; | ||
skipRowsAfterHeader: number; | ||
} | ||
/** | ||
* __Lazily read from a CSV file or multiple files via glob patterns.__ | ||
* | ||
* This allows the query optimizer to push down predicates and | ||
* projections to the scan level, thereby potentially reducing | ||
* memory overhead. | ||
* ___ | ||
* @param path path to a file | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quoteChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.skipRows -Start reading after `skipRows` position. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.n_rows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* ___ | ||
* | ||
*/ | ||
export declare function scanCSV(path: string, options?: any): LazyDataFrame; | ||
* __Lazily read from a CSV file or multiple files via glob patterns.__ | ||
* | ||
* This allows the query optimizer to push down predicates and | ||
* projections to the scan level, thereby potentially reducing | ||
* memory overhead. | ||
* ___ | ||
* @param path path to a file | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quoteChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.skipRows -Start reading after `skipRows` position. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.nRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* ___ | ||
* | ||
*/ | ||
export declare function scanCSV(path: string, options?: Partial<ScanCsvOptions>): LazyDataFrame; | ||
/** | ||
* __Read a JSON file or string into a DataFrame.__ | ||
* | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.jsonFormat - Either "lines" or "json" | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @returns ({@link DataFrame}) | ||
* @example | ||
* ``` | ||
* const jsonString = ` | ||
* {"a", 1, "b", "foo", "c": 3} | ||
* {"a": 2, "b": "bar", "c": 6} | ||
* ` | ||
* > const df = pl.readJSON(jsonString) | ||
* > console.log(df) | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ foo ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ bar ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
export declare function readJSON(pathOrBody: string | Buffer, options?: any): DataFrame; | ||
interface JsonScanOptions { | ||
inferSchemaLength?: number; | ||
nThreads?: number; | ||
batchSize?: number; | ||
lowMemory?: boolean; | ||
numRows?: number; | ||
skipRows?: number; | ||
rowCount?: RowCount; | ||
* __Read a JSON file or string into a DataFrame.__ | ||
* | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.csv`. | ||
* - body: String or buffer to be read as a CSV | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.jsonFormat - Either "lines" or "json" | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @returns ({@link DataFrame}) | ||
* @example | ||
* ``` | ||
* const jsonString = ` | ||
* {"a", 1, "b", "foo", "c": 3} | ||
* {"a": 2, "b": "bar", "c": 6} | ||
* ` | ||
* > const df = pl.readJSON(jsonString) | ||
* > console.log(df) | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ foo ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ bar ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
export declare function readJSON(pathOrBody: string | Buffer, options?: Partial<ReadJsonOptions>): DataFrame; | ||
interface ScanJsonOptions { | ||
inferSchemaLength: number | null; | ||
nThreads: number; | ||
batchSize: number; | ||
lowMemory: boolean; | ||
numRows: number; | ||
skipRows: number; | ||
rowCount: RowCount; | ||
} | ||
/** | ||
* __Read a JSON file or string into a DataFrame.__ | ||
* | ||
* _Note: Currently only newline delimited JSON is supported_ | ||
* @param path - path to json file | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `./file.json`. | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.nThreads - Maximum number of threads to use when reading json. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.numRows Stop reading from parquet file after reading ``numRows``. | ||
* @param options.skipRows -Start reading after ``skipRows`` position. | ||
* @param options.rowCount Add row count as column | ||
* @returns ({@link DataFrame}) | ||
* @example | ||
* ``` | ||
* > const df = pl.scanJson('path/to/file.json', {numRows: 2}).collectSync() | ||
* > console.log(df) | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ foo ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ bar ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
export declare function scanJson(path: string, options?: JsonScanOptions): LazyDataFrame; | ||
* __Read a JSON file or string into a DataFrame.__ | ||
* | ||
* _Note: Currently only newline delimited JSON is supported_ | ||
* @param path - path to json file | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `./file.json`. | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.nThreads - Maximum number of threads to use when reading json. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.numRows Stop reading from parquet file after reading ``numRows``. | ||
* @param options.skipRows -Start reading after ``skipRows`` position. | ||
* @param options.rowCount Add row count as column | ||
* @returns ({@link DataFrame}) | ||
* @example | ||
* ``` | ||
* > const df = pl.scanJson('path/to/file.json', {numRows: 2}).collectSync() | ||
* > console.log(df) | ||
* shape: (2, 3) | ||
* ╭─────┬─────┬─────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪═════╪═════╡ | ||
* │ 1 ┆ foo ┆ 3 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ bar ┆ 6 │ | ||
* ╰─────┴─────┴─────╯ | ||
* ``` | ||
*/ | ||
export declare function scanJson(path: string, options?: Partial<ScanJsonOptions>): LazyDataFrame; | ||
interface ReadParquetOptions { | ||
columns?: string[] | number[]; | ||
numRows?: number; | ||
parallel?: "auto" | "columns" | "row_groups" | "none"; | ||
rowCount?: RowCount; | ||
columns: string[] | number[]; | ||
numRows: number; | ||
parallel: "auto" | "columns" | "row_groups" | "none"; | ||
rowCount: RowCount; | ||
} | ||
@@ -174,3 +225,8 @@ /** | ||
*/ | ||
export declare function readParquet(pathOrBody: string | Buffer, options?: ReadParquetOptions): DataFrame; | ||
export declare function readParquet(pathOrBody: string | Buffer, options?: Partial<ReadParquetOptions>): DataFrame; | ||
export interface ReadAvroOptions { | ||
columns: string[] | Array<string> | number[]; | ||
projection: number; | ||
nRows: number; | ||
} | ||
/** | ||
@@ -183,5 +239,5 @@ * Read into a DataFrame from an avro file. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.nRows Stop reading from avro file after reading ``n_rows``. | ||
* @param options.nRows Stop reading from avro file after reading ``nRows``. | ||
*/ | ||
export declare function readAvro(pathOrBody: string | Buffer, options?: any): DataFrame; | ||
export declare function readAvro(pathOrBody: string | Buffer, options?: Partial<ReadAvroOptions>): DataFrame; | ||
interface RowCount { | ||
@@ -200,134 +256,143 @@ name: string; | ||
/** | ||
* __Lazily read from a parquet file or multiple files via glob patterns.__ | ||
* ___ | ||
* This allows the query optimizer to push down predicates and projections to the scan level, | ||
* thereby potentially reducing memory overhead. | ||
* @param path Path to a file or or glob pattern | ||
* @param options.numRows Stop reading from parquet file after reading ``numRows``. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
* @param options.rechunk In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. | ||
*/ | ||
* __Lazily read from a parquet file or multiple files via glob patterns.__ | ||
* ___ | ||
* This allows the query optimizer to push down predicates and projections to the scan level, | ||
* thereby potentially reducing memory overhead. | ||
* @param path Path to a file or or glob pattern | ||
* @param options.numRows Stop reading from parquet file after reading ``numRows``. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
* @param options.rechunk In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. | ||
*/ | ||
export declare function scanParquet(path: string, options?: ScanParquetOptions): LazyDataFrame; | ||
export interface ReadIPCOptions { | ||
columns: string[] | number[]; | ||
nRows: number; | ||
} | ||
/** | ||
* __Read into a DataFrame from Arrow IPC (Feather v2) file.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.ipc`. | ||
* - body: String or buffer to be read as Arrow IPC | ||
* @param options.columns Columns to select. Accepts a list of column names. | ||
* @param options.numRows Stop reading from parquet file after reading ``n_rows``. | ||
*/ | ||
export declare function readIPC(pathOrBody: string | Buffer, options?: any): DataFrame; | ||
* __Read into a DataFrame from Arrow IPC (Feather v2) file.__ | ||
* ___ | ||
* @param pathOrBody - path or buffer or string | ||
* - path: Path to a file or a file like string. Any valid filepath can be used. Example: `file.ipc`. | ||
* - body: String or buffer to be read as Arrow IPC | ||
* @param options.columns Columns to select. Accepts a list of column names. | ||
* @param options.nRows Stop reading from parquet file after reading ``nRows``. | ||
*/ | ||
export declare function readIPC(pathOrBody: string | Buffer, options?: Partial<ReadIPCOptions>): DataFrame; | ||
export interface ScanIPCOptions { | ||
nRows: number; | ||
cache: boolean; | ||
rechunk: boolean; | ||
} | ||
/** | ||
* __Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.__ | ||
* ___ | ||
* @param path Path to a IPC file. | ||
* @param options.numRows Stop reading from IPC file after reading ``numRows`` | ||
* @param options.cache Cache the result after reading. | ||
* @param options.rechunk Reallocate to contiguous memory when all chunks/ files are parsed. | ||
*/ | ||
export declare function scanIPC(path: string, options?: any): LazyDataFrame; | ||
* __Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.__ | ||
* ___ | ||
* @param path Path to a IPC file. | ||
* @param options.nRows Stop reading from IPC file after reading ``nRows`` | ||
* @param options.cache Cache the result after reading. | ||
* @param options.rechunk Reallocate to contiguous memory when all chunks/ files are parsed. | ||
*/ | ||
export declare function scanIPC(path: string, options?: Partial<ScanIPCOptions>): LazyDataFrame; | ||
/** | ||
* __Read a stream into a Dataframe.__ | ||
* | ||
* **Warning:** this is much slower than `scanCSV` or `readCSV` | ||
* | ||
* This will consume the entire stream into a single buffer and then call `readCSV` | ||
* Only use it when you must consume from a stream, or when performance is not a major consideration | ||
* | ||
* ___ | ||
* @param stream - readable stream containing csv data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns Promise<DataFrame> | ||
* | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`a,b\n`); | ||
* >>> readStream.push(`1,2\n`); | ||
* >>> readStream.push(`2,2\n`); | ||
* >>> readStream.push(`3,2\n`); | ||
* >>> readStream.push(`4,2\n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readCSVStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
export declare function readCSVStream(stream: Readable, options?: any): Promise<DataFrame>; | ||
* __Read a stream into a Dataframe.__ | ||
* | ||
* **Warning:** this is much slower than `scanCSV` or `readCSV` | ||
* | ||
* This will consume the entire stream into a single buffer and then call `readCSV` | ||
* Only use it when you must consume from a stream, or when performance is not a major consideration | ||
* | ||
* ___ | ||
* @param stream - readable stream containing csv data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @param options.hasHeader - Indicate if first row of dataset is header or not. If set to False first row will be set to `column_x`, | ||
* `x` being an enumeration over every column in the dataset. | ||
* @param options.ignoreErrors -Try to keep reading lines if some lines yield errors. | ||
* @param options.endRows -After n rows are read from the CSV, it stops reading. | ||
* During multi-threaded parsing, an upper bound of `n` rows | ||
* cannot be guaranteed. | ||
* @param options.startRows -Start reading after `startRows` position. | ||
* @param options.projection -Indices of columns to select. Note that column indices start at zero. | ||
* @param options.sep -Character to use as delimiter in the file. | ||
* @param options.columns -Columns to select. | ||
* @param options.rechunk -Make sure that all columns are contiguous in memory by aggregating the chunks into a single array. | ||
* @param options.encoding -Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character. | ||
* @param options.numThreads -Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system. | ||
* @param options.dtype -Overwrite the dtypes during inference. | ||
* @param options.lowMemory - Reduce memory usage in expense of performance. | ||
* @param options.commentChar - character that indicates the start of a comment line, for instance '#'. | ||
* @param options.quotChar -character that is used for csv quoting, default = ''. Set to null to turn special handling and escaping of quotes off. | ||
* @param options.nullValues - Values to interpret as null values. You can provide a | ||
* - `string` -> all values encountered equal to this string will be null | ||
* - `Array<string>` -> A null value per column. | ||
* - `Record<string,string>` -> An object or map that maps column name to a null value string.Ex. {"column_1": 0} | ||
* @param options.parseDates -Whether to attempt to parse dates or not | ||
* @returns Promise<DataFrame> | ||
* | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`a,b\n`); | ||
* >>> readStream.push(`1,2\n`); | ||
* >>> readStream.push(`2,2\n`); | ||
* >>> readStream.push(`3,2\n`); | ||
* >>> readStream.push(`4,2\n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readCSVStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
export declare function readCSVStream(stream: Readable, options?: Partial<ReadCsvOptions>): Promise<DataFrame>; | ||
/** | ||
* __Read a newline delimited JSON stream into a DataFrame.__ | ||
* | ||
* @param stream - readable stream containing json data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* Note: this is done per batch | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`${JSON.stringify({a: 1, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 2, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 3, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 4, b: 2})} \n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readJSONStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
export declare function readJSONStream(stream: Readable, options?: any): Promise<DataFrame>; | ||
* __Read a newline delimited JSON stream into a DataFrame.__ | ||
* | ||
* @param stream - readable stream containing json data | ||
* @param options | ||
* @param options.inferSchemaLength -Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8. | ||
* If set to `null`, a full table scan will be done (slow). | ||
* Note: this is done per batch | ||
* @param options.batchSize - Number of lines to read into the buffer at once. Modify this to change performance. | ||
* @example | ||
* ``` | ||
* >>> const readStream = new Stream.Readable({read(){}}); | ||
* >>> readStream.push(`${JSON.stringify({a: 1, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 2, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 3, b: 2})} \n`); | ||
* >>> readStream.push(`${JSON.stringify({a: 4, b: 2})} \n`); | ||
* >>> readStream.push(null); | ||
* | ||
* >>> pl.readJSONStream(readStream).then(df => console.log(df)); | ||
* shape: (4, 2) | ||
* ┌─────┬─────┐ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪═════╡ | ||
* │ 1 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┤ | ||
* │ 4 ┆ 2 │ | ||
* └─────┴─────┘ | ||
* ``` | ||
*/ | ||
export declare function readJSONStream(stream: Readable, options?: Partial<ReadJsonOptions>): Promise<DataFrame>; | ||
export {}; |
@@ -36,3 +36,3 @@ "use strict"; | ||
parseDates: false, | ||
skipRowsAfterHeader: 0 | ||
skipRowsAfterHeader: 0, | ||
}; | ||
@@ -42,3 +42,3 @@ const readJsonDefaultOptions = { | ||
inferSchemaLength: 50, | ||
format: "lines" | ||
format: "lines", | ||
}; | ||
@@ -62,6 +62,6 @@ // utility to read streams as lines. | ||
while (i < chunk.length) { | ||
if (chunk[i] === 10) { // '\n' | ||
__classPrivateFieldSet(this, _LineBatcher_accumulatedLines, // '\n' | ||
(_a = __classPrivateFieldGet(this, _LineBatcher_accumulatedLines, "f"), _a++, _a), "f"); | ||
if (__classPrivateFieldGet(this, _LineBatcher_accumulatedLines, "f") == __classPrivateFieldGet(this, _LineBatcher_batchSize, "f")) { | ||
if (chunk[i] === 10) { | ||
// '\n' | ||
__classPrivateFieldSet(this, _LineBatcher_accumulatedLines, (_a = __classPrivateFieldGet(this, _LineBatcher_accumulatedLines, "f"), _a++, _a), "f"); | ||
if (__classPrivateFieldGet(this, _LineBatcher_accumulatedLines, "f") === __classPrivateFieldGet(this, _LineBatcher_batchSize, "f")) { | ||
__classPrivateFieldGet(this, _LineBatcher_lines, "f").push(chunk.subarray(begin, i + 1)); | ||
@@ -99,5 +99,6 @@ this.push(Buffer.concat(__classPrivateFieldGet(this, _LineBatcher_lines, "f"))); | ||
function readCSV(pathOrBody, options) { | ||
options = { ...readCsvDefaultOptions, ...options }; | ||
const extensions = [".tsv", ".csv"]; | ||
if (Buffer.isBuffer(pathOrBody)) { | ||
return (0, dataframe_1._DataFrame)(polars_internal_1.default.readCsv(pathOrBody, { ...readCsvDefaultOptions, ...options })); | ||
return (0, dataframe_1._DataFrame)(polars_internal_1.default.readCsv(pathOrBody, options)); | ||
} | ||
@@ -108,6 +109,6 @@ if (typeof pathOrBody === "string") { | ||
const buf = Buffer.from(pathOrBody, "utf-8"); | ||
return (0, dataframe_1._DataFrame)(polars_internal_1.default.readCsv(buf, { ...readCsvDefaultOptions, ...options })); | ||
return (0, dataframe_1._DataFrame)(polars_internal_1.default.readCsv(buf, options)); | ||
} | ||
else { | ||
return (0, dataframe_1._DataFrame)(polars_internal_1.default.readCsv(pathOrBody, { ...readCsvDefaultOptions, ...options })); | ||
return (0, dataframe_1._DataFrame)(polars_internal_1.default.readCsv(pathOrBody, options)); | ||
} | ||
@@ -131,3 +132,3 @@ } | ||
parseDates: false, | ||
skipRowsAfterHeader: 0 | ||
skipRowsAfterHeader: 0, | ||
}; | ||
@@ -225,12 +226,12 @@ function scanCSV(path, options) { | ||
/** | ||
* __Lazily read from a parquet file or multiple files via glob patterns.__ | ||
* ___ | ||
* This allows the query optimizer to push down predicates and projections to the scan level, | ||
* thereby potentially reducing memory overhead. | ||
* @param path Path to a file or or glob pattern | ||
* @param options.numRows Stop reading from parquet file after reading ``numRows``. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
* @param options.rechunk In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. | ||
*/ | ||
* __Lazily read from a parquet file or multiple files via glob patterns.__ | ||
* ___ | ||
* This allows the query optimizer to push down predicates and projections to the scan level, | ||
* thereby potentially reducing memory overhead. | ||
* @param path Path to a file or or glob pattern | ||
* @param options.numRows Stop reading from parquet file after reading ``numRows``. | ||
* @param options.cache Cache the result after reading. | ||
* @param options.parallel Read the parquet file in parallel. The single threaded reader consumes less memory. | ||
* @param options.rechunk In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. | ||
*/ | ||
function scanParquet(path, options = {}) { | ||
@@ -237,0 +238,0 @@ const pliOptions = {}; |
@@ -6,17 +6,4 @@ import { DataFrame } from "../dataframe"; | ||
import { Deserialize, GroupByOps, Serialize } from "../shared_traits"; | ||
import { LazyOptions, LazyJoinOptions } from "../types"; | ||
declare const inspect: unique symbol; | ||
declare type LazyJoinOptions = { | ||
how?: "left" | "inner" | "outer" | "cross"; | ||
suffix?: string; | ||
allowParallel?: boolean; | ||
forceParallel?: boolean; | ||
}; | ||
declare type LazyOptions = { | ||
typeCoercion?: boolean; | ||
predicatePushdown?: boolean; | ||
projectionPushdown?: boolean; | ||
simplifyExpression?: boolean; | ||
stringCache?: boolean; | ||
noOptimization?: boolean; | ||
}; | ||
/** | ||
@@ -126,9 +113,9 @@ * Representation of a Lazy computation graph / query. | ||
* ``` | ||
* >>> lf = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }).lazy() | ||
* >>> // Filter on one condition | ||
* >>> lf.filter(pl.col("foo").lt(3)).collect() | ||
* > lf = pl.DataFrame({ | ||
* > "foo": [1, 2, 3], | ||
* > "bar": [6, 7, 8], | ||
* > "ham": ['a', 'b', 'c'] | ||
* > }).lazy() | ||
* > // Filter on one condition | ||
* > lf.filter(pl.col("foo").lt(3)).collect() | ||
* shape: (2, 3) | ||
@@ -166,3 +153,37 @@ * ┌─────┬─────┬─────┐ | ||
/** | ||
* Add a join operation to the Logical Plan. | ||
* __SQL like joins.__ | ||
* @param df - DataFrame to join with. | ||
* @param options | ||
* @param options.leftOn - Name(s) of the left join column(s). | ||
* @param options.rightOn - Name(s) of the right join column(s). | ||
* @param options.on - Name(s) of the join columns in both DataFrames. | ||
* @param options.how - Join strategy | ||
* @param options.suffix - Suffix to append to columns with a duplicate name. | ||
* @param options.allowParallel - Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel. | ||
* @param options.forceParallel - Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel. | ||
* @see {@link LazyJoinOptions} | ||
* @example | ||
* ``` | ||
* >>> const df = pl.DataFrame({ | ||
* >>> foo: [1, 2, 3], | ||
* >>> bar: [6.0, 7.0, 8.0], | ||
* >>> ham: ['a', 'b', 'c'], | ||
* >>> }).lazy() | ||
* >>> | ||
* >>> const otherDF = pl.DataFrame({ | ||
* >>> apple: ['x', 'y', 'z'], | ||
* >>> ham: ['a', 'b', 'd'], | ||
* >>> }).lazy(); | ||
* >>> const result = await df.join(otherDF, { on: 'ham', how: 'inner' }).collect(); | ||
* shape: (2, 4) | ||
* ╭─────┬─────┬─────┬───────╮ | ||
* │ foo ┆ bar ┆ ham ┆ apple │ | ||
* │ --- ┆ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ f64 ┆ str ┆ str │ | ||
* ╞═════╪═════╪═════╪═══════╡ | ||
* │ 1 ┆ 6 ┆ "a" ┆ "x" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ 7 ┆ "b" ┆ "y" │ | ||
* ╰─────┴─────┴─────┴───────╯ | ||
* ``` | ||
*/ | ||
@@ -233,3 +254,3 @@ join(other: LazyDataFrame, joinOptions: { | ||
``` | ||
>>> const gdp = pl.DataFrame({ | ||
>const gdp = pl.DataFrame({ | ||
... date: [ | ||
@@ -243,3 +264,3 @@ ... new Date('2016-01-01'), | ||
... }) | ||
>>> const population = pl.DataFrame({ | ||
>const population = pl.DataFrame({ | ||
... date: [ | ||
@@ -253,3 +274,3 @@ ... new Date('2016-05-12'), | ||
... }) | ||
>>> population.joinAsof( | ||
>population.joinAsof( | ||
... gdp, | ||
@@ -417,2 +438,3 @@ ... {leftOn:"date", rightOn:"date", strategy:"backward"} | ||
} | ||
/** @ignore */ | ||
export declare const _LazyDataFrame: (_ldf: any) => LazyDataFrame; | ||
@@ -422,3 +444,4 @@ export interface LazyDataFrameConstructor extends Deserialize<LazyDataFrame> { | ||
} | ||
/** @ignore */ | ||
export declare const LazyDataFrame: LazyDataFrameConstructor; | ||
export {}; |
@@ -34,2 +34,3 @@ "use strict"; | ||
}; | ||
/** @ignore */ | ||
const _LazyDataFrame = (_ldf) => { | ||
@@ -139,6 +140,6 @@ const unwrap = (method, ...args) => { | ||
const by = (0, utils_1.selectionToExprList)([opt.by], false); | ||
return (0, groupby_1.LazyGroupBy)(_ldf.groupby(by, opt.maintainOrder)); | ||
return (0, groupby_1._LazyGroupBy)(_ldf.groupby(by, opt.maintainOrder)); | ||
} | ||
const by = (0, utils_1.selectionToExprList)([opt], false); | ||
return (0, groupby_1.LazyGroupBy)(_ldf.groupby(by, maintainOrder)); | ||
return (0, groupby_1._LazyGroupBy)(_ldf.groupby(by, maintainOrder)); | ||
}, | ||
@@ -150,3 +151,3 @@ groupByRolling({ indexColumn, by, period, offset, closed }) { | ||
const lgb = _ldf.groupbyRolling(indexColumn, period, offset, closed, by); | ||
return (0, groupby_1.LazyGroupBy)(lgb); | ||
return (0, groupby_1._LazyGroupBy)(lgb); | ||
}, | ||
@@ -161,3 +162,3 @@ groupByDynamic({ indexColumn, every, period, offset, truncate, includeBoundaries, closed, by, }) { | ||
const lgb = _ldf.groupbyDynamic(indexColumn, every, period, offset, truncate, includeBoundaries, closed, by); | ||
return (0, groupby_1.LazyGroupBy)(lgb); | ||
return (0, groupby_1._LazyGroupBy)(lgb); | ||
}, | ||
@@ -208,3 +209,3 @@ head(len = 5) { | ||
let rightOn; | ||
if (!(other?._ldf)) { | ||
if (!other?._ldf) { | ||
throw new TypeError("Expected a 'lazyFrame' as join table"); | ||
@@ -243,3 +244,4 @@ } | ||
} | ||
let toleranceStr, toleranceNum; | ||
let toleranceStr; | ||
let toleranceNum; | ||
if (typeof options.tolerance === "string") { | ||
@@ -361,2 +363,3 @@ toleranceStr = options.tolerance; | ||
exports._LazyDataFrame = _LazyDataFrame; | ||
/** @ignore */ | ||
exports.LazyDataFrame = Object.assign(exports._LazyDataFrame, { | ||
@@ -363,0 +366,0 @@ deserialize: (buf, fmt) => (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.deserialize(buf, fmt)), |
@@ -0,95 +1,8 @@ | ||
import { DateFunctions } from "../../shared_traits"; | ||
import { Expr } from "../expr"; | ||
export interface ExprDateTime { | ||
/** | ||
* Extract day from underlying Date representation. | ||
* Can be performed on Date and Datetime. | ||
* | ||
* Returns the day of month starting from 1. | ||
* The return value ranges from 1 to 31. (The last day of month differs by months.) | ||
* @returns day as pl.UInt32 | ||
*/ | ||
day(): Expr; | ||
/** | ||
* Extract hour from underlying DateTime representation. | ||
* Can be performed on Datetime. | ||
* | ||
* Returns the hour number from 0 to 23. | ||
* @returns Hour as UInt32 | ||
*/ | ||
hour(): Expr; | ||
/** | ||
* Extract minutes from underlying DateTime representation. | ||
* Can be performed on Datetime. | ||
* | ||
* Returns the minute number from 0 to 59. | ||
* @returns minute as UInt32 | ||
*/ | ||
minute(): Expr; | ||
/** | ||
* Extract month from underlying Date representation. | ||
* Can be performed on Date and Datetime. | ||
* | ||
* Returns the month number starting from 1. | ||
* The return value ranges from 1 to 12. | ||
* @returns Month as UInt32 | ||
*/ | ||
month(): Expr; | ||
/** | ||
* Extract seconds from underlying DateTime representation. | ||
* Can be performed on Datetime. | ||
* | ||
* Returns the number of nanoseconds since the whole non-leap second. | ||
* The range from 1,000,000,000 to 1,999,999,999 represents the leap second. | ||
* @returns Nanosecond as UInt32 | ||
*/ | ||
nanosecond(): Expr; | ||
/** | ||
* Extract ordinal day from underlying Date representation. | ||
* Can be performed on Date and Datetime. | ||
* | ||
* Returns the day of year starting from 1. | ||
* The return value ranges from 1 to 366. (The last day of year differs by years.) | ||
* @returns Day as UInt32 | ||
*/ | ||
ordinalDay(): Expr; | ||
/** | ||
* Extract seconds from underlying DateTime representation. | ||
* Can be performed on Datetime. | ||
* | ||
* Returns the second number from 0 to 59. | ||
* @returns Second as UInt32 | ||
*/ | ||
second(): Expr; | ||
/** | ||
* Format Date/datetime with a formatting rule: See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html). | ||
*/ | ||
strftime(fmt: string): Expr; | ||
/** Return timestamp in ms as Int64 type. */ | ||
timestamp(): Expr; | ||
/** | ||
* Extract the week from the underlying Date representation. | ||
* Can be performed on Date and Datetime | ||
* | ||
* Returns the ISO week number starting from 1. | ||
* The return value ranges from 1 to 53. (The last week of year differs by years.) | ||
* @returns Week number as UInt32 | ||
*/ | ||
week(): Expr; | ||
/** | ||
* Extract the week day from the underlying Date representation. | ||
* Can be performed on Date and Datetime. | ||
* | ||
* Returns the weekday number where monday = 0 and sunday = 6 | ||
* @returns Week day as UInt32 | ||
*/ | ||
weekday(): Expr; | ||
/** | ||
* Extract year from underlying Date representation. | ||
* Can be performed on Date and Datetime. | ||
* | ||
* Returns the year number in the calendar date. | ||
* @returns Year as Int32 | ||
*/ | ||
year(): Expr; | ||
/** | ||
* DateTime functions | ||
*/ | ||
export interface ExprDateTime extends DateFunctions<Expr> { | ||
} | ||
export declare const ExprDateTimeFunctions: (_expr: any) => ExprDateTime; |
@@ -5,12 +5,586 @@ import * as dt from "./datetime"; | ||
import * as struct from "./struct"; | ||
declare namespace expr { | ||
export import DateTimeFunctions = dt.ExprDateTimeFunctions; | ||
export import ListFunctions = lst.ExprListFunctions; | ||
export import StringFunctions = str.ExprStringFunctions; | ||
export import StructFunctions = struct.ExprStructFunctions; | ||
export import List = lst.ExprList; | ||
export import Datetime = dt.ExprDateTime; | ||
export import String = str.ExprString; | ||
export import Struct = struct.ExprStruct; | ||
export type { StringNamespace } from "./string"; | ||
export type { ExprList as ListNamespace } from "./list"; | ||
export type { ExprDateTime as DatetimeNamespace } from "./datetime"; | ||
export type { ExprStruct as StructNamespace } from "./struct"; | ||
import { DataType } from "../../datatypes"; | ||
import { ExprOrString, INSPECT_SYMBOL } from "../../utils"; | ||
import { Series } from "../../series"; | ||
import { Arithmetic, Comparison, Cumulative, Deserialize, Rolling, Round, Sample, Serialize } from "../../shared_traits"; | ||
import { FillNullStrategy, RankMethod } from "../../types"; | ||
/** | ||
* Expressions that can be used in various contexts. | ||
*/ | ||
export interface Expr extends Rolling<Expr>, Arithmetic<Expr>, Comparison<Expr>, Cumulative<Expr>, Sample<Expr>, Round<Expr>, Serialize { | ||
/** @ignore */ | ||
_expr: any; | ||
/** | ||
* Datetime namespace | ||
*/ | ||
get date(): dt.ExprDateTime; | ||
/** | ||
* String namespace | ||
*/ | ||
get str(): str.StringNamespace; | ||
/** | ||
* List namespace | ||
*/ | ||
get lst(): lst.ExprList; | ||
/** | ||
* Struct namespace | ||
*/ | ||
get struct(): struct.ExprStruct; | ||
[Symbol.toStringTag](): string; | ||
[INSPECT_SYMBOL](): string; | ||
toString(): string; | ||
/** compat with `JSON.stringify` */ | ||
toJSON(): string; | ||
/** Take absolute values */ | ||
abs(): Expr; | ||
aggGroups(): Expr; | ||
/** | ||
* Rename the output of an expression. | ||
* @param name new name | ||
* @see {@link Expr.as} | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3], | ||
* ... "b": ["a", "b", None], | ||
* ... }) | ||
* > df | ||
* shape: (3, 2) | ||
* ╭─────┬──────╮ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ str │ | ||
* ╞═════╪══════╡ | ||
* │ 1 ┆ "a" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2 ┆ "b" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 3 ┆ null │ | ||
* ╰─────┴──────╯ | ||
* > df.select([ | ||
* ... pl.col("a").alias("bar"), | ||
* ... pl.col("b").alias("foo"), | ||
* ... ]) | ||
* shape: (3, 2) | ||
* ╭─────┬──────╮ | ||
* │ bar ┆ foo │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ str │ | ||
* ╞═════╪══════╡ | ||
* │ 1 ┆ "a" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2 ┆ "b" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 3 ┆ null │ | ||
* ╰─────┴──────╯ | ||
*``` | ||
*/ | ||
alias(name: string): Expr; | ||
and(other: any): Expr; | ||
/** Get the index of the maximal value. */ | ||
argMax(): Expr; | ||
/** Get the index of the minimal value. */ | ||
argMin(): Expr; | ||
/** | ||
* Get the index values that would sort this column. | ||
* @param reverse | ||
* - false -> order from small to large. | ||
* - true -> order from large to small. | ||
* @returns UInt32 Series | ||
*/ | ||
argSort(reverse?: boolean): Expr; | ||
argSort({ reverse }: { | ||
reverse: boolean; | ||
}): Expr; | ||
/** Get index of first unique value. */ | ||
argUnique(): Expr; | ||
/** @see {@link Expr.alias} */ | ||
as(name: string): Expr; | ||
/** Fill missing values with the next to be seen values */ | ||
backwardFill(): Expr; | ||
/** Cast between data types. */ | ||
cast(dtype: DataType, strict?: boolean): Expr; | ||
/** Count the number of values in this expression */ | ||
count(): Expr; | ||
/** Calculate the n-th discrete difference. | ||
* | ||
* @param n number of slots to shift | ||
* @param nullBehavior ignore or drop | ||
*/ | ||
diff(n: number, nullBehavior: "ignore" | "drop"): Expr; | ||
diff(o: { | ||
n: number; | ||
nullBehavior: "ignore" | "drop"; | ||
}): Expr; | ||
/** | ||
* Compute the dot/inner product between two Expressions | ||
* @param other Expression to compute dot product with | ||
*/ | ||
dot(other: any): Expr; | ||
/** | ||
* Exclude certain columns from a wildcard/regex selection. | ||
* | ||
* You may also use regexes in the exclude list. They must start with `^` and end with `$`. | ||
* | ||
* @param columns Column(s) to exclude from selection | ||
* @example | ||
* ``` | ||
* >df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3], | ||
* ... "b": ["a", "b", None], | ||
* ... "c": [None, 2, 1], | ||
* ...}) | ||
* >df | ||
* shape: (3, 3) | ||
* ╭─────┬──────┬──────╮ | ||
* │ a ┆ b ┆ c │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 │ | ||
* ╞═════╪══════╪══════╡ | ||
* │ 1 ┆ "a" ┆ null │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2 ┆ "b" ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 3 ┆ null ┆ 1 │ | ||
* ╰─────┴──────┴──────╯ | ||
* >df.select( | ||
* ... pl.col("*").exclude("b"), | ||
* ... ) | ||
* shape: (3, 2) | ||
* ╭─────┬──────╮ | ||
* │ a ┆ c │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ i64 │ | ||
* ╞═════╪══════╡ | ||
* │ 1 ┆ null │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 2 ┆ 2 │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌┤ | ||
* │ 3 ┆ 1 │ | ||
* ╰─────┴──────╯ | ||
* ``` | ||
*/ | ||
exclude(column: string, ...columns: string[]): Expr; | ||
/** | ||
* Explode a list or utf8 Series. | ||
* | ||
* This means that every item is expanded to a new row. | ||
*/ | ||
explode(): Expr; | ||
/** | ||
* Extend the Series with given number of values. | ||
* @param value The value to extend the Series with. This value may be null to fill with nulls. | ||
* @param n The number of values to extend. | ||
* @deprecated | ||
* @see {@link extendConstant} | ||
*/ | ||
extend(value: any, n: number): Expr; | ||
extend(opt: { | ||
value: any; | ||
n: number; | ||
}): Expr; | ||
/** | ||
* Extend the Series with given number of values. | ||
* @param value The value to extend the Series with. This value may be null to fill with nulls. | ||
* @param n The number of values to extend. | ||
*/ | ||
extendConstant(value: any, n: number): Expr; | ||
extendConstant(opt: { | ||
value: any; | ||
n: number; | ||
}): Expr; | ||
/** Fill nan value with a fill value */ | ||
fillNan(other: any): Expr; | ||
/** Fill null value with a fill value or strategy */ | ||
fillNull(other: any | FillNullStrategy): Expr; | ||
/** | ||
* Filter a single column. | ||
* | ||
* Mostly useful in in aggregation context. | ||
* If you want to filter on a DataFrame level, use `LazyFrame.filter`. | ||
* @param predicate Boolean expression. | ||
*/ | ||
filter(predicate: Expr): Expr; | ||
/** Get the first value. */ | ||
first(): Expr; | ||
/** @see {@link Expr.explode} */ | ||
flatten(): Expr; | ||
/** Fill missing values with the latest seen values */ | ||
forwardFill(): Expr; | ||
/** Hash the Series. */ | ||
hash(k0?: number, k1?: number, k2?: number, k3?: number): Expr; | ||
hash({ k0, k1, k2, k3, }: { | ||
k0?: number; | ||
k1?: number; | ||
k2?: number; | ||
k3?: number; | ||
}): Expr; | ||
/** Take the first n values. */ | ||
head(length?: number): Expr; | ||
head({ length }: { | ||
length: number; | ||
}): Expr; | ||
inner(): any; | ||
/** Interpolate intermediate values. The interpolation method is linear. */ | ||
interpolate(): Expr; | ||
/** Get mask of duplicated values. */ | ||
isDuplicated(): Expr; | ||
/** Create a boolean expression returning `true` where the expression values are finite. */ | ||
isFinite(): Expr; | ||
/** Get a mask of the first unique value. */ | ||
isFirst(): Expr; | ||
/** | ||
* Check if elements of this Series are in the right Series, or List values of the right Series. | ||
* | ||
* @param other Series of primitive type or List type. | ||
* @returns Expr that evaluates to a Boolean Series. | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... "sets": [[1, 2, 3], [1, 2], [9, 10]], | ||
* ... "optional_members": [1, 2, 3] | ||
* ... }) | ||
* > df.select( | ||
* ... pl.col("optional_members").isIn("sets").alias("contains") | ||
* ... ) | ||
* shape: (3, 1) | ||
* ┌──────────┐ | ||
* │ contains │ | ||
* │ --- │ | ||
* │ bool │ | ||
* ╞══════════╡ | ||
* │ true │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ true │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ false │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
isIn(other: any): Expr; | ||
/** Create a boolean expression returning `true` where the expression values are infinite. */ | ||
isInfinite(): Expr; | ||
/** Create a boolean expression returning `true` where the expression values are NaN (Not A Number). */ | ||
isNan(): Expr; | ||
/** Create a boolean expression returning `true` where the expression values are not NaN (Not A Number). */ | ||
isNotNan(): Expr; | ||
/** Create a boolean expression returning `true` where the expression does not contain null values. */ | ||
isNotNull(): Expr; | ||
/** Create a boolean expression returning `True` where the expression contains null values. */ | ||
isNull(): Expr; | ||
/** Get mask of unique values. */ | ||
isUnique(): Expr; | ||
/** | ||
* Keep the original root name of the expression. | ||
* | ||
* A groupby aggregation often changes the name of a column. | ||
* With `keepName` we can keep the original name of the column | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... "a": [1, 2, 3], | ||
* ... "b": ["a", "b", None], | ||
* ... }) | ||
* | ||
* > df | ||
* ... .groupBy("a") | ||
* ... .agg(pl.col("b").list()) | ||
* ... .sort({by:"a"}) | ||
* | ||
* shape: (3, 2) | ||
* ╭─────┬────────────╮ | ||
* │ a ┆ b_agg_list │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ list [str] │ | ||
* ╞═════╪════════════╡ | ||
* │ 1 ┆ [a] │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ [b] │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ [null] │ | ||
* ╰─────┴────────────╯ | ||
* | ||
* Keep the original column name: | ||
* | ||
* > df | ||
* ... .groupby("a") | ||
* ... .agg(col("b").list().keepName()) | ||
* ... .sort({by:"a"}) | ||
* | ||
* shape: (3, 2) | ||
* ╭─────┬────────────╮ | ||
* │ a ┆ b │ | ||
* │ --- ┆ --- │ | ||
* │ i64 ┆ list [str] │ | ||
* ╞═════╪════════════╡ | ||
* │ 1 ┆ [a] │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ [b] │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ [null] │ | ||
* ╰─────┴────────────╯ | ||
* ``` | ||
*/ | ||
keepName(): Expr; | ||
kurtosis(): Expr; | ||
kurtosis(fisher: boolean, bias?: boolean): Expr; | ||
kurtosis({ fisher, bias }: { | ||
fisher?: boolean; | ||
bias?: boolean; | ||
}): Expr; | ||
/** Get the last value. */ | ||
last(): Expr; | ||
/** Aggregate to list. */ | ||
list(): Expr; | ||
/** Returns a unit Series with the lowest value possible for the dtype of this expression. */ | ||
lowerBound(): Expr; | ||
/** Compute the max value of the arrays in the list */ | ||
max(): Expr; | ||
/** Compute the mean value of the arrays in the list */ | ||
mean(): Expr; | ||
/** Get median value. */ | ||
median(): Expr; | ||
/** Get minimum value. */ | ||
min(): Expr; | ||
/** Compute the most occurring value(s). Can return multiple Values */ | ||
mode(): Expr; | ||
/** Negate a boolean expression. */ | ||
not(): Expr; | ||
/** Count unique values. */ | ||
nUnique(): Expr; | ||
or(other: any): Expr; | ||
/** | ||
* Apply window function over a subgroup. | ||
* | ||
* This is similar to a groupby + aggregation + self join. | ||
* Or similar to [window functions in Postgres](https://www.postgresql.org/docs/9.1/tutorial-window.html) | ||
* @param partitionBy Column(s) to partition by. | ||
* | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... "groups": [1, 1, 2, 2, 1, 2, 3, 3, 1], | ||
* ... "values": [1, 2, 3, 4, 5, 6, 7, 8, 8], | ||
* ... }) | ||
* > df.select( | ||
* ... pl.col("groups").sum().over("groups") | ||
* ... ) | ||
* ╭────────┬────────╮ | ||
* │ groups ┆ values │ | ||
* │ --- ┆ --- │ | ||
* │ i32 ┆ i32 │ | ||
* ╞════════╪════════╡ | ||
* │ 1 ┆ 16 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 1 ┆ 16 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ 13 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ 13 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ ... ┆ ... │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 1 ┆ 16 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ 13 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ 15 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ 15 │ | ||
* ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤ | ||
* │ 1 ┆ 16 │ | ||
* ╰────────┴────────╯ | ||
* ``` | ||
*/ | ||
over(by: ExprOrString, ...partitionBy: ExprOrString[]): Expr; | ||
/** Raise expression to the power of exponent. */ | ||
pow(exponent: number): Expr; | ||
pow({ exponent }: { | ||
exponent: number; | ||
}): Expr; | ||
/** | ||
* Add a prefix the to root column name of the expression. | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... "A": [1, 2, 3, 4, 5], | ||
* ... "fruits": ["banana", "banana", "apple", "apple", "banana"], | ||
* ... "B": [5, 4, 3, 2, 1], | ||
* ... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], | ||
* ... }) | ||
* shape: (5, 4) | ||
* ╭─────┬──────────┬─────┬──────────╮ | ||
* │ A ┆ fruits ┆ B ┆ cars │ | ||
* │ --- ┆ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 ┆ str │ | ||
* ╞═════╪══════════╪═════╪══════════╡ | ||
* │ 1 ┆ "banana" ┆ 5 ┆ "beetle" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ "banana" ┆ 4 ┆ "audi" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ "apple" ┆ 3 ┆ "beetle" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 4 ┆ "apple" ┆ 2 ┆ "beetle" │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 5 ┆ "banana" ┆ 1 ┆ "beetle" │ | ||
* ╰─────┴──────────┴─────┴──────────╯ | ||
* > df.select( | ||
* ... pl.all().reverse().prefix("reverse_"), | ||
* ... ) | ||
* shape: (5, 8) | ||
* ╭───────────┬────────────────┬───────────┬──────────────╮ | ||
* │ reverse_A ┆ reverse_fruits ┆ reverse_B ┆ reverse_cars │ | ||
* │ --- ┆ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ str ┆ i64 ┆ str │ | ||
* ╞═══════════╪════════════════╪═══════════╪══════════════╡ | ||
* │ 5 ┆ "banana" ┆ 1 ┆ "beetle" │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 4 ┆ "apple" ┆ 2 ┆ "beetle" │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ "apple" ┆ 3 ┆ "beetle" │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 2 ┆ "banana" ┆ 4 ┆ "audi" │ | ||
* ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 1 ┆ "banana" ┆ 5 ┆ "beetle" │ | ||
* ╰───────────┴────────────────┴───────────┴──────────────╯ | ||
* ``` | ||
*/ | ||
prefix(prefix: string): Expr; | ||
/** Get quantile value. */ | ||
quantile(quantile: number | Expr): Expr; | ||
/** Assign ranks to data, dealing with ties appropriately. */ | ||
rank(method?: RankMethod): Expr; | ||
rank({ method }: { | ||
method: string; | ||
}): Expr; | ||
reinterpret(signed?: boolean): Expr; | ||
reinterpret({ signed }: { | ||
signed: boolean; | ||
}): Expr; | ||
/** | ||
* Repeat the elements in this Series `n` times by dictated by the number given by `by`. | ||
* The elements are expanded into a `List` | ||
* @param by Numeric column that determines how often the values will be repeated. | ||
* | ||
* The column will be coerced to UInt32. Give this dtype to make the coercion a no-op. | ||
*/ | ||
repeatBy(by: Expr | string): Expr; | ||
/** Reverse the arrays in the list */ | ||
reverse(): Expr; | ||
/** | ||
* Shift the values by a given period and fill the parts that will be empty due to this operation | ||
* @param periods number of places to shift (may be negative). | ||
*/ | ||
shift(periods?: number): Expr; | ||
shift({ periods }: { | ||
periods: number; | ||
}): Expr; | ||
/** | ||
* Shift the values by a given period and fill the parts that will be empty due to this operation | ||
* @param periods Number of places to shift (may be negative). | ||
* @param fillValue Fill null values with the result of this expression. | ||
*/ | ||
shiftAndFill(periods: number, fillValue: Expr): Expr; | ||
shiftAndFill({ periods, fillValue, }: { | ||
periods: number; | ||
fillValue: Expr; | ||
}): Expr; | ||
/** | ||
* Compute the sample skewness of a data set. | ||
* For normally distributed data, the skewness should be about zero. For | ||
* unimodal continuous distributions, a skewness value greater than zero means | ||
* that there is more weight in the right tail of the distribution. | ||
* ___ | ||
* @param bias If False, then the calculations are corrected for statistical bias. | ||
*/ | ||
skew(bias?: boolean): Expr; | ||
skew({ bias }: { | ||
bias: boolean; | ||
}): Expr; | ||
/** Slice the Series. */ | ||
slice(offset: number | Expr, length: number | Expr): Expr; | ||
slice({ offset, length, }: { | ||
offset: number | Expr; | ||
length: number | Expr; | ||
}): Expr; | ||
/** | ||
* Sort this column. In projection/ selection context the whole column is sorted. | ||
* @param reverse | ||
* * false -> order from small to large. | ||
* * true -> order from large to small. | ||
* @param nullsLast If true nulls are considered to be larger than any valid value | ||
*/ | ||
sort(reverse?: boolean, nullsLast?: boolean): Expr; | ||
sort({ reverse, nullsLast, }: { | ||
reverse?: boolean; | ||
nullsLast?: boolean; | ||
}): Expr; | ||
/** | ||
* Sort this column by the ordering of another column, or multiple other columns. | ||
In projection/ selection context the whole column is sorted. | ||
If used in a groupby context, the groups are sorted. | ||
Parameters | ||
---------- | ||
@param by | ||
The column(s) used for sorting. | ||
@param reverse | ||
false -> order from small to large. | ||
true -> order from large to small. | ||
*/ | ||
sortBy(by: ExprOrString[] | ExprOrString, reverse?: boolean | boolean[]): Expr; | ||
sortBy(options: { | ||
by: ExprOrString[] | ExprOrString; | ||
reverse?: boolean | boolean[]; | ||
}): Expr; | ||
/** Get standard deviation. */ | ||
std(): Expr; | ||
/** Add a suffix the to root column name of the expression. */ | ||
suffix(suffix: string): Expr; | ||
/** | ||
* Get sum value. | ||
* @note | ||
* Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. | ||
*/ | ||
sum(): Expr; | ||
/** Take the last n values. */ | ||
tail(length?: number): Expr; | ||
tail({ length }: { | ||
length: number; | ||
}): Expr; | ||
/** | ||
* Take values by index. | ||
* @param index An expression that leads to a UInt32 dtyped Series. | ||
*/ | ||
take(index: Expr | number[] | Series): Expr; | ||
take({ index }: { | ||
index: Expr | number[] | Series; | ||
}): Expr; | ||
/** Take every nth value in the Series and return as a new Series. */ | ||
takeEvery(n: number): Expr; | ||
/** | ||
* Get the unique values of this expression; | ||
* @param maintainOrder Maintain order of data. This requires more work. | ||
*/ | ||
unique(maintainOrder?: boolean | { | ||
maintainOrder: boolean; | ||
}): Expr; | ||
/** Returns a unit Series with the highest value possible for the dtype of this expression. */ | ||
upperBound(): Expr; | ||
/** Get variance. */ | ||
var(): Expr; | ||
/** Alias for filter: @see {@link filter} */ | ||
where(predicate: Expr): Expr; | ||
} | ||
export = expr; | ||
/** @ignore */ | ||
export declare const _Expr: (_expr: any) => Expr; | ||
export interface ExprConstructor extends Deserialize<Expr> { | ||
isExpr(arg: any): arg is Expr; | ||
} | ||
export declare const Expr: ExprConstructor; | ||
/** @ignore */ | ||
export declare const exprToLitOrExpr: (expr: any, stringToLit?: boolean) => Expr; |
"use strict"; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); | ||
var desc = Object.getOwnPropertyDescriptor(m, k); | ||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { | ||
desc = { enumerable: true, get: function() { return m[k]; } }; | ||
} | ||
Object.defineProperty(o, k2, desc); | ||
}) : (function(o, m, k, k2) { | ||
@@ -21,2 +25,7 @@ if (k2 === undefined) k2 = k; | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.exprToLitOrExpr = exports.Expr = exports._Expr = void 0; | ||
const dt = __importStar(require("./datetime")); | ||
@@ -26,9 +35,502 @@ const lst = __importStar(require("./list")); | ||
const struct = __importStar(require("./struct")); | ||
var expr; | ||
(function (expr) { | ||
expr.DateTimeFunctions = dt.ExprDateTimeFunctions; | ||
expr.ListFunctions = lst.ExprListFunctions; | ||
expr.StringFunctions = str.ExprStringFunctions; | ||
expr.StructFunctions = struct.ExprStructFunctions; | ||
})(expr || (expr = {})); | ||
module.exports = expr; | ||
const polars_internal_1 = __importDefault(require("../../internals/polars_internal")); | ||
const utils_1 = require("../../utils"); | ||
const series_1 = require("../../series"); | ||
/** @ignore */ | ||
const _Expr = (_expr) => { | ||
const unwrap = (method, ...args) => { | ||
return _expr[method](...args); | ||
}; | ||
const wrap = (method, ...args) => { | ||
return (0, exports._Expr)(unwrap(method, ...args)); | ||
}; | ||
const wrapExprArg = (method, lit = false) => (other) => { | ||
const expr = (0, exports.exprToLitOrExpr)(other, lit).inner(); | ||
return wrap(method, expr); | ||
}; | ||
const rolling = (method) => (opts, weights, minPeriods, center) => { | ||
const windowSize = opts?.["windowSize"] ?? (typeof opts === "number" ? opts : null); | ||
if (windowSize === null) { | ||
throw new Error("window size is required"); | ||
} | ||
const callOpts = { | ||
windowSize: `${windowSize}i`, | ||
weights: opts?.["weights"] ?? weights, | ||
minPeriods: opts?.["minPeriods"] ?? minPeriods ?? windowSize, | ||
center: opts?.["center"] ?? center ?? false, | ||
}; | ||
return wrap(method, callOpts); | ||
}; | ||
return { | ||
_expr, | ||
[Symbol.toStringTag]() { | ||
return "Expr"; | ||
}, | ||
[utils_1.INSPECT_SYMBOL]() { | ||
return _expr.toString(); | ||
}, | ||
serialize(format) { | ||
return _expr.serialize(format); | ||
}, | ||
toString() { | ||
return _expr.toString(); | ||
}, | ||
toJSON(...args) { | ||
// this is passed by `JSON.stringify` when calling `toJSON()` | ||
if (args[0] === "") { | ||
return _expr.toJs(); | ||
} | ||
return _expr.serialize("json").toString(); | ||
}, | ||
get str() { | ||
return str.ExprStringFunctions(_expr); | ||
}, | ||
get lst() { | ||
return lst.ExprListFunctions(_expr); | ||
}, | ||
get date() { | ||
return dt.ExprDateTimeFunctions(_expr); | ||
}, | ||
get struct() { | ||
return struct.ExprStructFunctions(_expr); | ||
}, | ||
abs() { | ||
return (0, exports._Expr)(_expr.abs()); | ||
}, | ||
aggGroups() { | ||
return (0, exports._Expr)(_expr.aggGroups()); | ||
}, | ||
alias(name) { | ||
return (0, exports._Expr)(_expr.alias(name)); | ||
}, | ||
inner() { | ||
return _expr; | ||
}, | ||
and(other) { | ||
const expr = (0, exports.exprToLitOrExpr)(other, false).inner(); | ||
return (0, exports._Expr)(_expr.and(expr)); | ||
}, | ||
argMax() { | ||
return (0, exports._Expr)(_expr.argMax()); | ||
}, | ||
argMin() { | ||
return (0, exports._Expr)(_expr.argMin()); | ||
}, | ||
argSort(reverse = false) { | ||
reverse = reverse?.reverse ?? reverse; | ||
return (0, exports._Expr)(_expr.argSort(reverse)); | ||
}, | ||
argUnique() { | ||
return (0, exports._Expr)(_expr.argUnique()); | ||
}, | ||
as(name) { | ||
return (0, exports._Expr)(_expr.alias(name)); | ||
}, | ||
backwardFill() { | ||
return (0, exports._Expr)(_expr.backwardFill()); | ||
}, | ||
cast(dtype, strict = false) { | ||
return (0, exports._Expr)(_expr.cast(dtype, strict)); | ||
}, | ||
ceil() { | ||
return (0, exports._Expr)(_expr.ceil()); | ||
}, | ||
clip(arg, max) { | ||
if (typeof arg === "number") { | ||
return (0, exports._Expr)(_expr.clip(arg, max)); | ||
} | ||
else { | ||
return (0, exports._Expr)(_expr.clip(arg.min, arg.max)); | ||
} | ||
}, | ||
count() { | ||
return (0, exports._Expr)(_expr.count()); | ||
}, | ||
cumCount(reverse = false) { | ||
reverse = reverse?.reverse ?? reverse; | ||
return (0, exports._Expr)(_expr.cumcount(reverse?.reverse ?? reverse)); | ||
}, | ||
cumMax(reverse = false) { | ||
reverse = reverse?.reverse ?? reverse; | ||
return (0, exports._Expr)(_expr.cummax(reverse)); | ||
}, | ||
cumMin(reverse = false) { | ||
reverse = reverse?.reverse ?? reverse; | ||
return (0, exports._Expr)(_expr.cummin(reverse)); | ||
}, | ||
cumProd(reverse = false) { | ||
reverse = reverse?.reverse ?? reverse; | ||
return (0, exports._Expr)(_expr.cumprod(reverse)); | ||
}, | ||
cumSum(reverse = false) { | ||
reverse = reverse?.reverse ?? reverse; | ||
return (0, exports._Expr)(_expr.cumsum(reverse)); | ||
}, | ||
diff(n, nullBehavior = "ignore") { | ||
if (typeof n === "number") { | ||
return (0, exports._Expr)(_expr.diff(n, nullBehavior)); | ||
} | ||
else { | ||
return (0, exports._Expr)(_expr.diff(n.n, n.nullBehavior)); | ||
} | ||
}, | ||
dot(other) { | ||
const expr = (0, exports.exprToLitOrExpr)(other, false).inner(); | ||
return (0, exports._Expr)(_expr.dot(expr)); | ||
}, | ||
exclude(...columns) { | ||
return (0, exports._Expr)(_expr.exclude(columns.flat(2))); | ||
}, | ||
explode() { | ||
return (0, exports._Expr)(_expr.explode()); | ||
}, | ||
extend(o, n) { | ||
if (n !== null && typeof n === "number") { | ||
return (0, exports._Expr)(_expr.extendConstant(o, n)); | ||
} | ||
return (0, exports._Expr)(_expr.extendConstant(o.value, o.n)); | ||
}, | ||
extendConstant(o, n) { | ||
if (n !== null && typeof n === "number") { | ||
return (0, exports._Expr)(_expr.extendConstant(o, n)); | ||
} | ||
return (0, exports._Expr)(_expr.extendConstant(o.value, o.n)); | ||
}, | ||
fillNan(other) { | ||
const expr = (0, exports.exprToLitOrExpr)(other, true).inner(); | ||
return (0, exports._Expr)(_expr.fillNan(expr)); | ||
}, | ||
fillNull(fillValue) { | ||
if (["backward", "forward", "mean", "min", "max", "zero", "one"].includes(fillValue)) { | ||
return (0, exports._Expr)(_expr.fillNullWithStrategy(fillValue)); | ||
} | ||
const expr = (0, exports.exprToLitOrExpr)(fillValue).inner(); | ||
return (0, exports._Expr)(_expr.fillNull(expr)); | ||
}, | ||
filter(predicate) { | ||
const expr = (0, exports.exprToLitOrExpr)(predicate).inner(); | ||
return (0, exports._Expr)(_expr.filter(expr)); | ||
}, | ||
first() { | ||
return (0, exports._Expr)(_expr.first()); | ||
}, | ||
flatten() { | ||
return (0, exports._Expr)(_expr.explode()); | ||
}, | ||
floor() { | ||
return (0, exports._Expr)(_expr.floor()); | ||
}, | ||
forwardFill() { | ||
return (0, exports._Expr)(_expr.forwardFill()); | ||
}, | ||
hash(obj = 0, k1 = 1, k2 = 2, k3 = 3) { | ||
if (typeof obj === "number" || typeof obj === "bigint") { | ||
return wrap("hash", BigInt(obj), BigInt(k1), BigInt(k2), BigInt(k3)); | ||
} | ||
const o = { k0: obj, k1: k1, k2: k2, k3: k3, ...obj }; | ||
return wrap("hash", BigInt(o.k0), BigInt(o.k1), BigInt(o.k2), BigInt(o.k3)); | ||
}, | ||
head(length) { | ||
if (typeof length === "number") { | ||
return wrap("head", length); | ||
} | ||
return wrap("head", length.length); | ||
}, | ||
interpolate(method = "linear") { | ||
return (0, exports._Expr)(_expr.interpolate(method)); | ||
}, | ||
isDuplicated() { | ||
return (0, exports._Expr)(_expr.isDuplicated()); | ||
}, | ||
isFinite() { | ||
return (0, exports._Expr)(_expr.isFinite()); | ||
}, | ||
isInfinite() { | ||
return (0, exports._Expr)(_expr.isInfinite()); | ||
}, | ||
isFirst() { | ||
return (0, exports._Expr)(_expr.isFirst()); | ||
}, | ||
isNan() { | ||
return (0, exports._Expr)(_expr.isNan()); | ||
}, | ||
isNotNan() { | ||
return (0, exports._Expr)(_expr.isNotNan()); | ||
}, | ||
isNotNull() { | ||
return (0, exports._Expr)(_expr.isNotNull()); | ||
}, | ||
isNull() { | ||
return (0, exports._Expr)(_expr.isNull()); | ||
}, | ||
isUnique() { | ||
return (0, exports._Expr)(_expr.isUnique()); | ||
}, | ||
isIn(other) { | ||
if (Array.isArray(other)) { | ||
other = polars_internal_1.default.lit((0, series_1.Series)(other).inner()); | ||
} | ||
else { | ||
other = (0, exports.exprToLitOrExpr)(other, false).inner(); | ||
} | ||
return wrap("isIn", other); | ||
}, | ||
keepName() { | ||
return (0, exports._Expr)(_expr.keepName()); | ||
}, | ||
kurtosis(obj, bias = true) { | ||
const fisher = obj?.["fisher"] ?? (typeof obj === "boolean" ? obj : true); | ||
bias = obj?.["bias"] ?? bias; | ||
return (0, exports._Expr)(_expr.kurtosis(fisher, bias)); | ||
}, | ||
last() { | ||
return (0, exports._Expr)(_expr.last()); | ||
}, | ||
list() { | ||
return (0, exports._Expr)(_expr.list()); | ||
}, | ||
lowerBound() { | ||
return (0, exports._Expr)(_expr.lowerBound()); | ||
}, | ||
max() { | ||
return (0, exports._Expr)(_expr.max()); | ||
}, | ||
mean() { | ||
return (0, exports._Expr)(_expr.mean()); | ||
}, | ||
median() { | ||
return (0, exports._Expr)(_expr.median()); | ||
}, | ||
min() { | ||
return (0, exports._Expr)(_expr.min()); | ||
}, | ||
mode() { | ||
return (0, exports._Expr)(_expr.mode()); | ||
}, | ||
not() { | ||
return (0, exports._Expr)(_expr.not()); | ||
}, | ||
nUnique() { | ||
return (0, exports._Expr)(_expr.nUnique()); | ||
}, | ||
or(other) { | ||
const expr = (0, exports.exprToLitOrExpr)(other).inner(); | ||
return (0, exports._Expr)(_expr.or(expr)); | ||
}, | ||
over(...exprs) { | ||
const partitionBy = (0, utils_1.selectionToExprList)(exprs, false); | ||
return wrap("over", partitionBy); | ||
}, | ||
pow(exponent) { | ||
return (0, exports._Expr)(_expr.pow(exponent?.exponent ?? exponent)); | ||
}, | ||
prefix(prefix) { | ||
return (0, exports._Expr)(_expr.prefix(prefix)); | ||
}, | ||
quantile(quantile, interpolation = "nearest") { | ||
if (exports.Expr.isExpr(quantile)) { | ||
quantile = quantile._expr; | ||
} | ||
else { | ||
quantile = polars_internal_1.default.lit(quantile); | ||
} | ||
return (0, exports._Expr)(_expr.quantile(quantile, interpolation)); | ||
}, | ||
rank(method = "average", reverse = false) { | ||
return (0, exports._Expr)(_expr.rank(method?.method ?? method, method?.reverse ?? reverse)); | ||
}, | ||
reinterpret(signed = true) { | ||
signed = signed?.signed ?? signed; | ||
return (0, exports._Expr)(_expr.reinterpret(signed)); | ||
}, | ||
repeatBy(expr) { | ||
const e = (0, exports.exprToLitOrExpr)(expr, false)._expr; | ||
return (0, exports._Expr)(_expr.repeatBy(e)); | ||
}, | ||
reverse() { | ||
return (0, exports._Expr)(_expr.reverse()); | ||
}, | ||
rollingMax: rolling("rollingMax"), | ||
rollingMean: rolling("rollingMean"), | ||
rollingMin: rolling("rollingMin"), | ||
rollingSum: rolling("rollingSum"), | ||
rollingStd: rolling("rollingStd"), | ||
rollingVar: rolling("rollingVar"), | ||
rollingMedian: rolling("rollingMedian"), | ||
rollingQuantile(val, interpolation, windowSize, weights, minPeriods, center) { | ||
if (typeof val === "number") { | ||
return wrap("rollingQuantile", val, interpolation ?? "nearest", { | ||
windowSize: `${windowSize}i`, | ||
weights, | ||
minPeriods, | ||
center, | ||
}); | ||
} | ||
windowSize = | ||
val?.["windowSize"] ?? (typeof val === "number" ? val : null); | ||
if (windowSize === null) { | ||
throw new Error("window size is required"); | ||
} | ||
const options = { | ||
windowSize: `${windowSize}i`, | ||
weights: val?.["weights"] ?? weights, | ||
minPeriods: val?.["minPeriods"] ?? minPeriods ?? windowSize, | ||
center: val?.["center"] ?? center ?? false, | ||
}; | ||
return wrap("rollingQuantile", val.quantile, val.interpolation ?? "nearest", options); | ||
}, | ||
rollingSkew(val, bias = true) { | ||
if (typeof val === "number") { | ||
return wrap("rollingSkew", val, bias); | ||
} | ||
return wrap("rollingSkew", val.windowSize, val.bias ?? bias); | ||
}, | ||
round(decimals) { | ||
return (0, exports._Expr)(_expr.round(decimals?.decimals ?? decimals)); | ||
}, | ||
sample(opts, frac, withReplacement = false, seed) { | ||
if (opts?.n !== undefined || opts?.frac !== undefined) { | ||
return this.sample(opts.n, opts.frac, opts.withReplacement, seed); | ||
} | ||
if (typeof opts === "number") { | ||
throw new Error("sample_n is not yet supported for expr"); | ||
} | ||
if (typeof frac === "number") { | ||
return wrap("sampleFrac", frac, withReplacement, false, seed); | ||
} | ||
else { | ||
throw new TypeError("must specify either 'frac' or 'n'"); | ||
} | ||
}, | ||
shift(periods) { | ||
return (0, exports._Expr)(_expr.shift(periods)); | ||
}, | ||
shiftAndFill(optOrPeriods, fillValue) { | ||
if (typeof optOrPeriods === "number") { | ||
fillValue = (0, exports.exprToLitOrExpr)(fillValue).inner(); | ||
return wrap("shiftAndFill", optOrPeriods, fillValue); | ||
} | ||
else { | ||
fillValue = (0, exports.exprToLitOrExpr)(optOrPeriods.fillValue).inner(); | ||
const periods = optOrPeriods.periods; | ||
return wrap("shiftAndFill", periods, fillValue); | ||
} | ||
}, | ||
skew(bias) { | ||
return wrap("skew", bias?.bias ?? bias ?? true); | ||
}, | ||
slice(arg, len) { | ||
if (typeof arg === "number") { | ||
return wrap("slice", polars_internal_1.default.lit(arg), polars_internal_1.default.lit(len)); | ||
} | ||
return wrap("slice", polars_internal_1.default.lit(arg.offset), polars_internal_1.default.lit(arg.length)); | ||
}, | ||
sort(reverse = false, nullsLast = false) { | ||
if (typeof reverse === "boolean") { | ||
return wrap("sortWith", reverse, nullsLast); | ||
} | ||
return wrap("sortWith", reverse?.reverse ?? false, reverse?.nullsLast ?? nullsLast); | ||
}, | ||
sortBy(arg, reverse = false) { | ||
if (arg?.by !== undefined) { | ||
return this.sortBy(arg.by, arg.reverse); | ||
} | ||
reverse = Array.isArray(reverse) ? reverse.flat() : [reverse]; | ||
const by = (0, utils_1.selectionToExprList)(arg, false); | ||
return wrap("sortBy", by, reverse); | ||
}, | ||
std() { | ||
return (0, exports._Expr)(_expr.std()); | ||
}, | ||
suffix(suffix) { | ||
return (0, exports._Expr)(_expr.suffix(suffix)); | ||
}, | ||
sum() { | ||
return (0, exports._Expr)(_expr.sum()); | ||
}, | ||
tail(length) { | ||
return (0, exports._Expr)(_expr.tail(length)); | ||
}, | ||
take(indices) { | ||
if (Array.isArray(indices)) { | ||
indices = polars_internal_1.default.lit((0, series_1.Series)(indices).inner()); | ||
} | ||
else { | ||
indices = indices.inner(); | ||
} | ||
return wrap("take", indices); | ||
}, | ||
takeEvery(n) { | ||
return (0, exports._Expr)(_expr.takeEvery(n)); | ||
}, | ||
unique(opt) { | ||
if (opt) { | ||
return wrap("unique_stable"); | ||
} | ||
return wrap("unique"); | ||
}, | ||
upperBound() { | ||
return (0, exports._Expr)(_expr.upperBound()); | ||
}, | ||
where(expr) { | ||
return this.filter(expr); | ||
}, | ||
var() { | ||
return (0, exports._Expr)(_expr.var()); | ||
}, | ||
add: wrapExprArg("add"), | ||
sub: wrapExprArg("sub"), | ||
div: wrapExprArg("div"), | ||
mul: wrapExprArg("mul"), | ||
rem: wrapExprArg("rem"), | ||
plus: wrapExprArg("add"), | ||
minus: wrapExprArg("sub"), | ||
divideBy: wrapExprArg("div"), | ||
multiplyBy: wrapExprArg("mul"), | ||
modulo: wrapExprArg("rem"), | ||
eq: wrapExprArg("eq"), | ||
equals: wrapExprArg("eq"), | ||
gtEq: wrapExprArg("gtEq"), | ||
greaterThanEquals: wrapExprArg("gtEq"), | ||
gt: wrapExprArg("gt"), | ||
greaterThan: wrapExprArg("gt"), | ||
ltEq: wrapExprArg("ltEq"), | ||
lessThanEquals: wrapExprArg("ltEq"), | ||
lt: wrapExprArg("lt"), | ||
lessThan: wrapExprArg("lt"), | ||
neq: wrapExprArg("neq"), | ||
notEquals: wrapExprArg("neq"), | ||
}; | ||
}; | ||
exports._Expr = _Expr; | ||
const isExpr = (anyVal) => { | ||
try { | ||
return anyVal?.[Symbol.toStringTag]?.() === "Expr"; | ||
} | ||
catch (err) { | ||
return false; | ||
} | ||
}; | ||
const deserialize = (buf, format) => { | ||
return (0, exports._Expr)(polars_internal_1.default.JsExpr.deserialize(buf, format)); | ||
}; | ||
exports.Expr = Object.assign(exports._Expr, { | ||
isExpr, | ||
deserialize, | ||
}); | ||
/** @ignore */ | ||
const exprToLitOrExpr = (expr, stringToLit = true) => { | ||
if (typeof expr === "string" && !stringToLit) { | ||
return (0, exports._Expr)(polars_internal_1.default.col(expr)); | ||
} | ||
else if (exports.Expr.isExpr(expr)) { | ||
return expr; | ||
} | ||
else if (series_1.Series.isSeries(expr)) { | ||
return (0, exports._Expr)(polars_internal_1.default.lit(expr._s)); | ||
} | ||
else { | ||
return (0, exports._Expr)(polars_internal_1.default.lit(expr)); | ||
} | ||
}; | ||
exports.exprToLitOrExpr = exprToLitOrExpr; |
@@ -6,3 +6,4 @@ import { Expr } from "../expr"; | ||
*/ | ||
export declare type ExprList = ListFunctions<Expr>; | ||
export interface ExprList extends ListFunctions<Expr> { | ||
} | ||
export declare const ExprListFunctions: (_expr: any) => ExprList; |
@@ -8,3 +8,3 @@ "use strict"; | ||
const expr_1 = require("../expr"); | ||
const series_1 = require("../../series/series"); | ||
const series_1 = require("../../series"); | ||
const polars_internal_1 = __importDefault(require("../../internals/polars_internal")); | ||
@@ -60,4 +60,9 @@ const functions_1 = require("../functions"); | ||
}, | ||
eval(expr, parallel) { | ||
return wrap("lstEval", expr, parallel); | ||
eval(expr, parallel = true) { | ||
if (expr_1.Expr.isExpr(expr)) { | ||
return wrap("lstEval", expr._expr, parallel); | ||
} | ||
else { | ||
return wrap("lstEval", expr, parallel); | ||
} | ||
}, | ||
@@ -64,0 +69,0 @@ first() { |
@@ -0,1 +1,2 @@ | ||
import { StringFunctions } from "../../shared_traits"; | ||
import { DataType } from "../../datatypes"; | ||
@@ -6,3 +7,3 @@ import { Expr } from "../expr"; | ||
*/ | ||
export interface ExprString { | ||
export interface StringNamespace extends StringFunctions<Expr> { | ||
/** | ||
@@ -157,2 +158,100 @@ * Vertically concat the values in the Series to a single string value. | ||
/** | ||
* Add a leading fillChar to a string until string length is reached. | ||
* If string is longer or equal to given length no modifications will be done | ||
* @param {number} length - of the final string | ||
* @param {string} fillChar - that will fill the string. | ||
* @note If a string longer than 1 character is provided only the first character will be used | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'foo': [ | ||
* ... "a", | ||
* ... "b", | ||
* ... "LONG_WORD", | ||
* ... "cow" | ||
* ... ]}) | ||
* > df.select(pl.col('foo').str.padStart("_", 3) | ||
* shape: (4, 1) | ||
* ┌──────────┐ | ||
* │ a │ | ||
* │ -------- │ | ||
* │ str │ | ||
* ╞══════════╡ | ||
* │ __a │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ __b │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ LONG_WORD│ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ cow │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
padStart(length: number, fillChar: string): Expr; | ||
/** | ||
* Add leading "0" to a string until string length is reached. | ||
* If string is longer or equal to given length no modifications will be done | ||
* @param {number} length - of the final string | ||
* @see {@link padStart} | ||
* * @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'foo': [ | ||
* ... "a", | ||
* ... "b", | ||
* ... "LONG_WORD", | ||
* ... "cow" | ||
* ... ]}) | ||
* > df.select(pl.col('foo').str.justify(3) | ||
* shape: (4, 1) | ||
* ┌──────────┐ | ||
* │ a │ | ||
* │ -------- │ | ||
* │ str │ | ||
* ╞══════════╡ | ||
* │ 00a │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 00b │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ LONG_WORD│ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ cow │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
zFill(length: number): Expr; | ||
/** | ||
* Add a trailing fillChar to a string until string length is reached. | ||
* If string is longer or equal to given length no modifications will be done | ||
* @param {number} length - of the final string | ||
* @param {string} fillChar - that will fill the string. | ||
* @note If a string longer than 1 character is provided only the first character will be used | ||
* * @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'foo': [ | ||
* ... "a", | ||
* ... "b", | ||
* ... "LONG_WORD", | ||
* ... "cow" | ||
* ... ]}) | ||
* > df.select(pl.col('foo').str.padEnd("_", 3) | ||
* shape: (4, 1) | ||
* ┌──────────┐ | ||
* │ a │ | ||
* │ -------- │ | ||
* │ str │ | ||
* ╞══════════╡ | ||
* │ a__ │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ b__ │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ LONG_WORD│ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ cow │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
padEnd(length: number, fillChar: string): Expr; | ||
/** | ||
* Create subslices of the string values of a Utf8 Series. | ||
@@ -181,2 +280,2 @@ * @param start - Start of the slice (negative indexing may be used). | ||
} | ||
export declare const ExprStringFunctions: (_expr: any) => ExprString; | ||
export declare const ExprStringFunctions: (_expr: any) => StringNamespace; |
@@ -14,5 +14,5 @@ "use strict"; | ||
case "hex": | ||
return wrap(`strHexDecode`, strict); | ||
return wrap("strHexDecode", strict); | ||
case "base64": | ||
return wrap(`strBase64Decode`, strict); | ||
return wrap("strBase64Decode", strict); | ||
default: | ||
@@ -38,5 +38,5 @@ throw new RangeError("supported encodings are 'hex' and 'base64'"); | ||
case "hex": | ||
return wrap(`strHexEncode`); | ||
return wrap("strHexEncode"); | ||
case "base64": | ||
return wrap(`strBase64Encode`); | ||
return wrap("strBase64Encode"); | ||
default: | ||
@@ -67,2 +67,11 @@ throw new RangeError("supported encodings are 'hex' and 'base64'"); | ||
}, | ||
padStart(length, fillChar) { | ||
return wrap("strPadStart", length, fillChar); | ||
}, | ||
zFill(length) { | ||
return wrap("strZFill", length); | ||
}, | ||
padEnd(length, fillChar) { | ||
return wrap("strPadEnd", length, fillChar); | ||
}, | ||
slice(start, length) { | ||
@@ -69,0 +78,0 @@ return wrap("strSlice", start, length); |
import { Expr } from "../expr"; | ||
/** | ||
* Struct functions | ||
*/ | ||
export interface ExprStruct { | ||
/** | ||
* Access a field by name | ||
* @param name - name of the field | ||
*/ | ||
field(name: string): Expr; | ||
/** | ||
* Rename the fields of a struct | ||
* @param names - new names of the fields | ||
*/ | ||
renameFields(names: string[]): Expr; | ||
} | ||
export declare const ExprStructFunctions: (_expr: any) => ExprStruct; |
@@ -12,5 +12,5 @@ "use strict"; | ||
return (0, expr_1._Expr)(_expr.structRenameFields(names)); | ||
} | ||
}, | ||
}; | ||
}; | ||
exports.ExprStructFunctions = ExprStructFunctions; |
import { Expr } from "./expr"; | ||
import { Series } from "../series/series"; | ||
import { Series } from "../series"; | ||
import { DataFrame } from "../dataframe"; | ||
@@ -14,7 +14,7 @@ import { ExprOrString } from "../utils"; | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "ham": [1, 2, 3], | ||
* >>> "hamburger": [11, 22, 33], | ||
* >>> "foo": [3, 2, 1]}) | ||
* >>> df.select(col("foo")) | ||
* > df = pl.DataFrame({ | ||
* > "ham": [1, 2, 3], | ||
* > "hamburger": [11, 22, 33], | ||
* > "foo": [3, 2, 1]}) | ||
* > df.select(col("foo")) | ||
* shape: (3, 1) | ||
@@ -32,3 +32,3 @@ * ╭─────╮ | ||
* ╰─────╯ | ||
* >>> df.select(col("*")) | ||
* > df.select(col("*")) | ||
* shape: (3, 3) | ||
@@ -46,3 +46,3 @@ * ╭─────┬───────────┬─────╮ | ||
* ╰─────┴───────────┴─────╯ | ||
* >>> df.select(col("^ham.*$")) | ||
* > df.select(col("^ham.*$")) | ||
* shape: (3, 2) | ||
@@ -60,3 +60,3 @@ * ╭─────┬───────────╮ | ||
* ╰─────┴───────────╯ | ||
* >>> df.select(col("*").exclude("ham")) | ||
* > df.select(col("*").exclude("ham")) | ||
* shape: (3, 2) | ||
@@ -74,3 +74,3 @@ * ╭───────────┬─────╮ | ||
* ╰───────────┴─────╯ | ||
* >>> df.select(col(["hamburger", "foo"]) | ||
* > df.select(col(["hamburger", "foo"]) | ||
* shape: (3, 2) | ||
@@ -88,3 +88,3 @@ * ╭───────────┬─────╮ | ||
* ╰───────────┴─────╯ | ||
* >>> df.select(col(pl.Series(["hamburger", "foo"])) | ||
* > df.select(col(pl.Series(["hamburger", "foo"])) | ||
* shape: (3, 2) | ||
@@ -120,5 +120,5 @@ * ╭───────────┬─────╮ | ||
* ``` | ||
* >>> df.lazy() | ||
* >>> .filter(pl.col("foo").lt(pl.arange(0, 100))) | ||
* >>> .collect() | ||
* > df.lazy() | ||
* > .filter(pl.col("foo").lt(pl.arange(0, 100))) | ||
* > .collect() | ||
* ``` | ||
@@ -170,3 +170,3 @@ */ | ||
* ``` | ||
* >>> pl.col("*").exclude(columns) | ||
* > pl.col("*").exclude(columns) | ||
* ``` | ||
@@ -177,2 +177,3 @@ */ | ||
/** Get the first value. */ | ||
export declare function first(): Expr; | ||
export declare function first(column: string): Expr; | ||
@@ -185,7 +186,7 @@ export declare function first<T>(column: Series): T; | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* > df = pl.DataFrame({ | ||
* ... "a": ["a", "b", "c"], | ||
* ... "b": [1, 2, 3], | ||
* ... }) | ||
* >>> df.select( | ||
* > df.select( | ||
* ... pl.format("foo_{}_bar_{}", pl.col("a"), "b").alias("fmt"), | ||
@@ -207,4 +208,4 @@ * ... ) | ||
* // You can use format as tag function as well | ||
* >>> pl.format("foo_{}_bar_{}", pl.col("a"), "b") // is the same as | ||
* >>> pl.format`foo_${pl.col("a")}_bar_${"b"}` | ||
* > pl.format("foo_{}_bar_{}", pl.col("a"), "b") // is the same as | ||
* > pl.format`foo_${pl.col("a")}_bar_${"b"}` | ||
* ``` | ||
@@ -259,3 +260,3 @@ */ | ||
``` | ||
>>> pl.DataFrame( | ||
>pl.DataFrame( | ||
... { | ||
@@ -280,3 +281,3 @@ ... "int": [1, 2], | ||
// Only collect specific columns as a struct: | ||
>>> df = pl.DataFrame({ | ||
>df = pl.DataFrame({ | ||
... "a": [1, 2, 3, 4], | ||
@@ -286,3 +287,3 @@ ... "b": ["one", "two", "three", "four"], | ||
... }) | ||
>>> df.withColumn(pl.struct(pl.col(["a", "b"])).alias("a_and_b")) | ||
>df.withColumn(pl.struct(pl.col(["a", "b"])).alias("a_and_b")) | ||
shape: (4, 4) | ||
@@ -313,4 +314,4 @@ ┌─────┬───────┬─────┬───────────────────────────────┐ | ||
* | ||
* >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >>> df.withColumn( | ||
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >df.withColumn( | ||
* ... pl.concatList(["a", "b"]).arr.eval(pl.element().rank()).alias("rank") | ||
@@ -333,5 +334,5 @@ * ... ) | ||
* | ||
* >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >>> df.withColumn( | ||
* ... pl.concatList(["a", "b"]).arr.eval(pl.element() * 2).alias("a_b_doubled") | ||
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >df.withColumn( | ||
* ... pl.concatList(["a", "b"]).arr.eval(pl.element().multiplyBy(2)).alias("a_b_doubled") | ||
* ... ) | ||
@@ -338,0 +339,0 @@ * shape: (3, 3) |
@@ -8,3 +8,3 @@ "use strict"; | ||
const expr_1 = require("./expr"); | ||
const series_1 = require("../series/series"); | ||
const series_1 = require("../series"); | ||
const dataframe_1 = require("../dataframe"); | ||
@@ -22,7 +22,7 @@ const utils_1 = require("../utils"); | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "ham": [1, 2, 3], | ||
* >>> "hamburger": [11, 22, 33], | ||
* >>> "foo": [3, 2, 1]}) | ||
* >>> df.select(col("foo")) | ||
* > df = pl.DataFrame({ | ||
* > "ham": [1, 2, 3], | ||
* > "hamburger": [11, 22, 33], | ||
* > "foo": [3, 2, 1]}) | ||
* > df.select(col("foo")) | ||
* shape: (3, 1) | ||
@@ -40,3 +40,3 @@ * ╭─────╮ | ||
* ╰─────╯ | ||
* >>> df.select(col("*")) | ||
* > df.select(col("*")) | ||
* shape: (3, 3) | ||
@@ -54,3 +54,3 @@ * ╭─────┬───────────┬─────╮ | ||
* ╰─────┴───────────┴─────╯ | ||
* >>> df.select(col("^ham.*$")) | ||
* > df.select(col("^ham.*$")) | ||
* shape: (3, 2) | ||
@@ -68,3 +68,3 @@ * ╭─────┬───────────╮ | ||
* ╰─────┴───────────╯ | ||
* >>> df.select(col("*").exclude("ham")) | ||
* > df.select(col("*").exclude("ham")) | ||
* shape: (3, 2) | ||
@@ -82,3 +82,3 @@ * ╭───────────┬─────╮ | ||
* ╰───────────┴─────╯ | ||
* >>> df.select(col(["hamburger", "foo"]) | ||
* > df.select(col(["hamburger", "foo"]) | ||
* shape: (3, 2) | ||
@@ -96,3 +96,3 @@ * ╭───────────┬─────╮ | ||
* ╰───────────┴─────╯ | ||
* >>> df.select(col(pl.Series(["hamburger", "foo"])) | ||
* > df.select(col(pl.Series(["hamburger", "foo"])) | ||
* shape: (3, 2) | ||
@@ -146,4 +146,6 @@ * ╭───────────┬─────╮ | ||
if (eager) { | ||
const df = (0, dataframe_1.DataFrame)({ "a": [1] }); | ||
return df.select(arange(low, high, step).alias("arange")).getColumn("arange"); | ||
const df = (0, dataframe_1.DataFrame)({ a: [1] }); | ||
return df | ||
.select(arange(low, high, step).alias("arange")) | ||
.getColumn("arange"); | ||
} | ||
@@ -212,2 +214,5 @@ return (0, expr_1._Expr)(polars_internal_1.default.arange(low, high, step)); | ||
function first(column) { | ||
if (!column) { | ||
return (0, expr_1._Expr)(polars_internal_1.default.first()); | ||
} | ||
if (series_1.Series.isSeries(column)) { | ||
@@ -231,7 +236,7 @@ if (column.length) { | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* > df = pl.DataFrame({ | ||
* ... "a": ["a", "b", "c"], | ||
* ... "b": [1, 2, 3], | ||
* ... }) | ||
* >>> df.select( | ||
* > df.select( | ||
* ... pl.format("foo_{}_bar_{}", pl.col("a"), "b").alias("fmt"), | ||
@@ -253,4 +258,4 @@ * ... ) | ||
* // You can use format as tag function as well | ||
* >>> pl.format("foo_{}_bar_{}", pl.col("a"), "b") // is the same as | ||
* >>> pl.format`foo_${pl.col("a")}_bar_${"b"}` | ||
* > pl.format("foo_{}_bar_{}", pl.col("a"), "b") // is the same as | ||
* > pl.format`foo_${pl.col("a")}_bar_${"b"}` | ||
* ``` | ||
@@ -373,3 +378,3 @@ */ | ||
if (series_1.Series.isSeries(exprs[0])) { | ||
return select((0, expr_1._Expr)(polars_internal_1.default.asStruct(exprs.map(e => polars_internal_1.default.lit(e.inner()))))).toSeries(); | ||
return select((0, expr_1._Expr)(polars_internal_1.default.asStruct(exprs.map((e) => polars_internal_1.default.lit(e.inner()))))).toSeries(); | ||
} | ||
@@ -387,4 +392,4 @@ exprs = (0, utils_1.selectionToExprList)(exprs); | ||
* | ||
* >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >>> df.withColumn( | ||
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >df.withColumn( | ||
* ... pl.concatList(["a", "b"]).arr.eval(pl.element().rank()).alias("rank") | ||
@@ -407,5 +412,5 @@ * ... ) | ||
* | ||
* >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >>> df.withColumn( | ||
* ... pl.concatList(["a", "b"]).arr.eval(pl.element() * 2).alias("a_b_doubled") | ||
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >df.withColumn( | ||
* ... pl.concatList(["a", "b"]).arr.eval(pl.element().multiplyBy(2)).alias("a_b_doubled") | ||
* ... ) | ||
@@ -412,0 +417,0 @@ * shape: (3, 3) |
import { Expr } from "./expr"; | ||
import { LazyDataFrame } from "./dataframe"; | ||
/** @ignore */ | ||
export declare const _LazyGroupBy: (_lgb: any) => LazyGroupBy; | ||
/** | ||
* LazyGroupBy | ||
* @category lazy | ||
*/ | ||
export interface LazyGroupBy { | ||
/** | ||
* Aggregate the groupby. | ||
*/ | ||
agg(...aggs: Expr[]): LazyDataFrame; | ||
/** | ||
* Return the first n rows of the groupby. | ||
* @param n number of rows to return | ||
*/ | ||
head(n?: number): LazyDataFrame; | ||
/** | ||
* Return the last n rows of the groupby. | ||
* @param n number of rows to return | ||
*/ | ||
tail(n?: number): LazyDataFrame; | ||
} | ||
export declare const LazyGroupBy: (_lgb: any) => LazyGroupBy; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.LazyGroupBy = void 0; | ||
exports._LazyGroupBy = void 0; | ||
const utils_1 = require("../utils"); | ||
const dataframe_1 = require("./dataframe"); | ||
const LazyGroupBy = (_lgb) => { | ||
/** @ignore */ | ||
const _LazyGroupBy = (_lgb) => { | ||
return { | ||
@@ -18,5 +19,5 @@ agg(...aggs) { | ||
return (0, dataframe_1._LazyDataFrame)(_lgb.tail(n)); | ||
} | ||
}, | ||
}; | ||
}; | ||
exports.LazyGroupBy = LazyGroupBy; | ||
exports._LazyGroupBy = _LazyGroupBy; |
@@ -1,11 +0,4 @@ | ||
import * as func from "./functions"; | ||
import * as gb from "./groupby"; | ||
import * as expr from "./expr"; | ||
import * as whenthen from "./whenthen"; | ||
declare namespace lazy { | ||
export import GroupBy = gb.LazyGroupBy; | ||
export import Expr = expr; | ||
export import funcs = func; | ||
export import when = whenthen; | ||
} | ||
export = lazy; | ||
export * from "./functions"; | ||
export * from "./groupby"; | ||
export * from "./expr"; | ||
export * from "./whenthen"; |
"use strict"; | ||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { | ||
if (k2 === undefined) k2 = k; | ||
Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); | ||
var desc = Object.getOwnPropertyDescriptor(m, k); | ||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { | ||
desc = { enumerable: true, get: function() { return m[k]; } }; | ||
} | ||
Object.defineProperty(o, k2, desc); | ||
}) : (function(o, m, k, k2) { | ||
@@ -9,25 +13,9 @@ if (k2 === undefined) k2 = k; | ||
})); | ||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { | ||
Object.defineProperty(o, "default", { enumerable: true, value: v }); | ||
}) : function(o, v) { | ||
o["default"] = v; | ||
}); | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); | ||
__setModuleDefault(result, mod); | ||
return result; | ||
var __exportStar = (this && this.__exportStar) || function(m, exports) { | ||
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p); | ||
}; | ||
const func = __importStar(require("./functions")); | ||
const gb = __importStar(require("./groupby")); | ||
const expr = __importStar(require("./expr")); | ||
const whenthen = __importStar(require("./whenthen")); | ||
var lazy; | ||
(function (lazy) { | ||
lazy.GroupBy = gb.LazyGroupBy; | ||
lazy.Expr = expr; | ||
lazy.funcs = func; | ||
lazy.when = whenthen; | ||
})(lazy || (lazy = {})); | ||
module.exports = lazy; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
__exportStar(require("./functions"), exports); | ||
__exportStar(require("./groupby"), exports); | ||
__exportStar(require("./expr"), exports); | ||
__exportStar(require("./whenthen"), exports); |
@@ -26,4 +26,4 @@ import { Expr } from "./expr"; | ||
* // Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't. | ||
* >>> df = pl.DataFrame({"foo": [1, 3, 4], "bar": [3, 4, 0]}) | ||
* >>> df.withColumn(pl.when(pl.col("foo").gt(2)).then(pl.lit(1)).otherwise(pl.lit(-1))) | ||
* > df = pl.DataFrame({"foo": [1, 3, 4], "bar": [3, 4, 0]}) | ||
* > df.withColumn(pl.when(pl.col("foo").gt(2)).then(pl.lit(1)).otherwise(pl.lit(-1))) | ||
* shape: (3, 3) | ||
@@ -43,3 +43,3 @@ * ┌─────┬─────┬─────────┐ | ||
* // Or with multiple `when, thens` chained: | ||
* >>> df.with_column( | ||
* > df.with_column( | ||
* ... pl.when(pl.col("foo").gt(2)) | ||
@@ -46,0 +46,0 @@ * ... .then(1) |
@@ -13,3 +13,3 @@ "use strict"; | ||
then: ({ _expr }) => WhenThenThen(_whenthenthen.then(_expr)), | ||
otherwise: ({ _expr }) => expr_1.Expr(_whenthenthen.otherwise(_expr)) | ||
otherwise: ({ _expr }) => expr_1.Expr(_whenthenthen.otherwise(_expr)), | ||
}; | ||
@@ -20,3 +20,3 @@ } | ||
when: ({ _expr }) => WhenThenThen(_whenthen.when(_expr)), | ||
otherwise: ({ _expr }) => expr_1.Expr(_whenthen.otherwise(_expr)) | ||
otherwise: ({ _expr }) => expr_1.Expr(_whenthen.otherwise(_expr)), | ||
}; | ||
@@ -30,3 +30,3 @@ } | ||
return { | ||
then: ({ _expr }) => WhenThen(_when.then(_expr)) | ||
then: ({ _expr }) => WhenThen(_when.then(_expr)), | ||
}; | ||
@@ -40,4 +40,4 @@ } | ||
* // Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't. | ||
* >>> df = pl.DataFrame({"foo": [1, 3, 4], "bar": [3, 4, 0]}) | ||
* >>> df.withColumn(pl.when(pl.col("foo").gt(2)).then(pl.lit(1)).otherwise(pl.lit(-1))) | ||
* > df = pl.DataFrame({"foo": [1, 3, 4], "bar": [3, 4, 0]}) | ||
* > df.withColumn(pl.when(pl.col("foo").gt(2)).then(pl.lit(1)).otherwise(pl.lit(-1))) | ||
* shape: (3, 3) | ||
@@ -57,3 +57,3 @@ * ┌─────┬─────┬─────────┐ | ||
* // Or with multiple `when, thens` chained: | ||
* >>> df.with_column( | ||
* > df.with_column( | ||
* ... pl.when(pl.col("foo").gt(2)) | ||
@@ -60,0 +60,0 @@ * ... .then(1) |
@@ -1,4 +0,4 @@ | ||
import { Series } from "./series"; | ||
import { Series } from "."; | ||
import { DateFunctions } from "../shared_traits"; | ||
export declare type SeriesDateFunctions = DateFunctions<Series>; | ||
export type SeriesDateFunctions = DateFunctions<Series>; | ||
export declare const SeriesDateFunctions: (_s: any) => SeriesDateFunctions; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.SeriesDateFunctions = void 0; | ||
const series_1 = require("./series"); | ||
const _1 = require("."); | ||
const SeriesDateFunctions = (_s) => { | ||
const wrap = (method, ...args) => { | ||
return (0, series_1._Series)(_s[method](...args)); | ||
return (0, _1._Series)(_s[method](...args)); | ||
}; | ||
@@ -9,0 +9,0 @@ const wrapNullArgs = (method) => () => wrap(method); |
@@ -1,4 +0,5 @@ | ||
import { Series } from "./series"; | ||
import { Series } from "."; | ||
import { ListFunctions } from "../shared_traits"; | ||
export declare type SeriesListFunctions = ListFunctions<Series>; | ||
export interface ListNamespace extends ListFunctions<Series> { | ||
} | ||
export declare const SeriesListFunctions: (_s: any) => ListFunctions<Series>; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.SeriesListFunctions = void 0; | ||
const series_1 = require("./series"); | ||
const _1 = require("."); | ||
const functions_1 = require("../lazy/functions"); | ||
const SeriesListFunctions = (_s) => { | ||
const wrap = (method, ...args) => { | ||
const s = (0, series_1._Series)(_s); | ||
const s = (0, _1._Series)(_s); | ||
return s | ||
@@ -10,0 +10,0 @@ .toFrame() |
import { DataType } from "../datatypes"; | ||
import { Series } from "./series"; | ||
import { Series } from "."; | ||
import { StringFunctions } from "../shared_traits"; | ||
/** | ||
* namespace containing series string functions | ||
*/ | ||
export interface StringFunctions { | ||
export interface StringNamespace extends StringFunctions<Series> { | ||
/** | ||
@@ -11,3 +12,3 @@ * Vertically concat the values in the Series to a single string value. | ||
* ``` | ||
* >>> pl.Series([1, null, 2]).str.concat("-")[0] | ||
* > pl.Series([1, null, 2]).str.concat("-")[0] | ||
* '1-null-2' | ||
@@ -74,3 +75,3 @@ * ``` | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* > df = pl.DataFrame({ | ||
* ... 'a': [ | ||
@@ -81,3 +82,3 @@ * ... 'http://vote.com/ballon_dor?candidate=messi&ref=polars', | ||
* ... ]}) | ||
* > df.getColumn("a").str.extract(/candidate=(\w+)/, 1) | ||
* > df.getColumn("a").str.extract(/candidate=(\w+)/, 1) | ||
* shape: (3, 1) | ||
@@ -107,3 +108,3 @@ * ┌─────────┐ | ||
* ``` | ||
* >>> s = pl.Series('json_val', [ | ||
* > s = pl.Series('json_val', [ | ||
* ... '{"a":"1"}', | ||
@@ -115,3 +116,3 @@ * ... null, | ||
* ... ]) | ||
* >>> s.str.jsonPathMatch('$.a') | ||
* > s.str.jsonPathMatch('$.a') | ||
* shape: (5,) | ||
@@ -134,2 +135,68 @@ * Series: 'json_val' [str] | ||
/** | ||
* Add a leading fillChar to a string until string length is reached. | ||
* If string is longer or equal to given length no modifications will be done | ||
* @param {number} length - of the final string | ||
* @param {string} fillChar - that will fill the string. | ||
* @note If a string longer than 1 character is provided only the first character will be used | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'foo': [ | ||
* ... "a", | ||
* ... "b", | ||
* ... "LONG_WORD", | ||
* ... "cow" | ||
* ... ]}) | ||
* > df.select(pl.col('foo').str.padStart("_", 3) | ||
* shape: (4, 1) | ||
* ┌──────────┐ | ||
* │ a │ | ||
* │ -------- │ | ||
* │ str │ | ||
* ╞══════════╡ | ||
* │ __a │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ __b │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ LONG_WORD│ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ cow │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
padStart(length: number, fillChar: string): Series; | ||
/** | ||
* Add a leading '0' to a string until string length is reached. | ||
* If string is longer or equal to given length no modifications will be done | ||
* @param {number} length - of the final string | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'foo': [ | ||
* ... "a", | ||
* ... "b", | ||
* ... "LONG_WORD", | ||
* ... "cow" | ||
* ... ]}) | ||
* > df.select(pl.col('foo').str.padStart(3) | ||
* shape: (4, 1) | ||
* ┌──────────┐ | ||
* │ a │ | ||
* │ -------- │ | ||
* │ str │ | ||
* ╞══════════╡ | ||
* │ 00a │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 00b │ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ LONG_WORD│ | ||
* ├╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ cow │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
zFill(length: number): Series; | ||
/** Add trailing zeros */ | ||
padEnd(length: number, fillChar: string): Series; | ||
/** | ||
* Replace first regex match with a string value. | ||
@@ -177,2 +244,2 @@ * @param pattern A valid regex pattern | ||
} | ||
export declare const StringFunctions: (_s: any) => StringFunctions; | ||
export declare const SeriesStringFunctions: (_s: any) => StringNamespace; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.StringFunctions = void 0; | ||
const series_1 = require("./series"); | ||
exports.SeriesStringFunctions = void 0; | ||
const _1 = require("."); | ||
const utils_1 = require("../utils"); | ||
const functions_1 = require("../lazy/functions"); | ||
const StringFunctions = (_s) => { | ||
const SeriesStringFunctions = (_s) => { | ||
const wrap = (method, ...args) => { | ||
const ret = _s[method](...args); | ||
return (0, series_1._Series)(ret); | ||
return (0, _1._Series)(ret); | ||
}; | ||
@@ -24,8 +24,5 @@ const handleDecode = (encoding, strict) => { | ||
concat(delimiter) { | ||
return (0, series_1._Series)(_s) | ||
return (0, _1._Series)(_s) | ||
.toFrame() | ||
.select((0, functions_1.col)(_s.name) | ||
.str | ||
.concat(delimiter) | ||
.as(_s.name)) | ||
.select((0, functions_1.col)(_s.name).str.concat(delimiter).as(_s.name)) | ||
.getColumn(_s.name); | ||
@@ -45,5 +42,5 @@ }, | ||
case "hex": | ||
return wrap(`strHexEncode`); | ||
return wrap("strHexEncode"); | ||
case "base64": | ||
return wrap(`strBase64Encode`); | ||
return wrap("strBase64Encode"); | ||
default: | ||
@@ -65,2 +62,11 @@ throw new RangeError("supported encodings are 'hex' and 'base64'"); | ||
}, | ||
padStart(length, fillChar) { | ||
return wrap("strPadStart", length, fillChar); | ||
}, | ||
zFill(length) { | ||
return wrap("strZFill", length); | ||
}, | ||
padEnd(length, fillChar) { | ||
return wrap("strPadEnd", length, fillChar); | ||
}, | ||
replace(pat, val) { | ||
@@ -80,29 +86,20 @@ return wrap("strReplace", (0, utils_1.regexToString)(pat), val); | ||
const inclusive = typeof options === "boolean" ? options : options?.inclusive; | ||
const s = (0, series_1._Series)(_s); | ||
const s = (0, _1._Series)(_s); | ||
return s | ||
.toFrame() | ||
.select((0, functions_1.col)(s.name) | ||
.str | ||
.split(by, inclusive) | ||
.as(s.name)) | ||
.select((0, functions_1.col)(s.name).str.split(by, inclusive).as(s.name)) | ||
.getColumn(s.name); | ||
}, | ||
strip() { | ||
const s = (0, series_1._Series)(_s); | ||
const s = (0, _1._Series)(_s); | ||
return s | ||
.toFrame() | ||
.select((0, functions_1.col)(s.name) | ||
.str | ||
.strip() | ||
.as(s.name)) | ||
.select((0, functions_1.col)(s.name).str.strip().as(s.name)) | ||
.getColumn(s.name); | ||
}, | ||
strptime(dtype, fmt) { | ||
const s = (0, series_1._Series)(_s); | ||
const s = (0, _1._Series)(_s); | ||
return s | ||
.toFrame() | ||
.select((0, functions_1.col)(s.name) | ||
.str | ||
.strptime(dtype, fmt) | ||
.as(s.name)) | ||
.select((0, functions_1.col)(s.name).str.strptime(dtype, fmt).as(s.name)) | ||
.getColumn(s.name); | ||
@@ -118,2 +115,2 @@ }, | ||
}; | ||
exports.StringFunctions = StringFunctions; | ||
exports.SeriesStringFunctions = SeriesStringFunctions; |
import { DataFrame } from "../dataframe"; | ||
import { Series } from "./series"; | ||
import { Series } from "."; | ||
export interface SeriesStructFunctions { | ||
@@ -4,0 +4,0 @@ fields: string[]; |
@@ -19,11 +19,13 @@ "use strict"; | ||
field(name) { | ||
return (0, dataframe_1.DataFrame)({}).select((0, expr_1._Expr)(polars_internal_1.default.lit(_s).structFieldByName(name))) | ||
return (0, dataframe_1.DataFrame)({}) | ||
.select((0, expr_1._Expr)(polars_internal_1.default.lit(_s).structFieldByName(name))) | ||
.toSeries(); | ||
}, | ||
renameFields(names) { | ||
return (0, dataframe_1.DataFrame)({}).select((0, expr_1._Expr)(polars_internal_1.default.lit(_s).structRenameFields(names))) | ||
return (0, dataframe_1.DataFrame)({}) | ||
.select((0, expr_1._Expr)(polars_internal_1.default.lit(_s).structRenameFields(names))) | ||
.toSeries(); | ||
} | ||
}, | ||
}; | ||
}; | ||
exports.SeriesStructFunctions = SeriesStructFunctions; |
/// <reference types="node" /> | ||
import { ColumnsOrExpr } from "./utils"; | ||
import { Expr } from "./lazy/expr"; | ||
export declare type RollingOptions = { | ||
windowSize: number; | ||
weights?: Array<number>; | ||
minPeriods?: number; | ||
center?: boolean; | ||
}; | ||
export declare type Interpolation = "nearest" | "higher" | "lower" | "midpoint" | "linear"; | ||
import { InterpolationMethod, RollingOptions, RollingQuantileOptions, RollingSkewOptions } from "./types"; | ||
import { DataType } from "./datatypes"; | ||
/** | ||
* Arithmetic operations | ||
*/ | ||
export interface Arithmetic<T> { | ||
add(rhs: any): T; | ||
sub(rhs: any): T; | ||
div(rhs: any): T; | ||
mul(rhs: any): T; | ||
rem(rhs: any): T; | ||
plus(rhs: any): T; | ||
minus(rhs: any): T; | ||
divideBy(rhs: any): T; | ||
multiplyBy(rhs: any): T; | ||
modulo(rhs: any): T; | ||
/** | ||
* Add self to other | ||
* @category Arithmetic | ||
*/ | ||
add(other: any): T; | ||
/** | ||
* Subtract other from self | ||
* @category Arithmetic | ||
*/ | ||
sub(other: any): T; | ||
/** | ||
* Divide self by other | ||
* @category Arithmetic | ||
*/ | ||
div(other: any): T; | ||
/** | ||
* Multiply self by other | ||
* @category Arithmetic | ||
*/ | ||
mul(other: any): T; | ||
/** | ||
* Get the remainder of self divided by other | ||
* @category Arithmetic | ||
*/ | ||
rem(other: any): T; | ||
/** | ||
* Add self to other | ||
* @category Arithmetic | ||
*/ | ||
plus(other: any): T; | ||
/** | ||
* Subtract other from self | ||
* @category Arithmetic | ||
*/ | ||
minus(other: any): T; | ||
/** | ||
* Divide self by other | ||
* @category Arithmetic | ||
*/ | ||
divideBy(other: any): T; | ||
/** | ||
* Multiply self by other | ||
* @category Arithmetic | ||
*/ | ||
multiplyBy(other: any): T; | ||
/** | ||
* Get the remainder of self divided by other | ||
* @category Arithmetic | ||
*/ | ||
modulo(other: any): T; | ||
} | ||
export interface Comparison<T> { | ||
eq(rhs: any): T; | ||
equals(rhs: any): T; | ||
gtEq(rhs: any): T; | ||
greaterThanEquals(rhs: any): T; | ||
gt(rhs: any): T; | ||
greaterThan(rhs: any): T; | ||
ltEq(rhs: any): T; | ||
lessThanEquals(rhs: any): T; | ||
lt(rhs: any): T; | ||
lessThan(rhs: any): T; | ||
neq(rhs: any): T; | ||
notEquals(rhs: any): T; | ||
/** | ||
* Compare self to other: `self == other` | ||
* @category Comparison | ||
*/ | ||
eq(other: any): T; | ||
/** | ||
* Compare self to other: `self == other` | ||
* @category Comparison | ||
*/ | ||
equals(other: any): T; | ||
/** | ||
* Compare self to other: `self >= other` | ||
* @category Comparison | ||
*/ | ||
gtEq(other: any): T; | ||
/** | ||
* Compare self to other: `self >= other` | ||
* @category Comparison | ||
*/ | ||
greaterThanEquals(other: any): T; | ||
/** | ||
* Compare self to other: `self > other` | ||
* @category Comparison | ||
*/ | ||
gt(other: any): T; | ||
/** | ||
* Compare self to other: `self > other` | ||
* @category Comparison | ||
*/ | ||
greaterThan(other: any): T; | ||
/** | ||
* Compare self to other: `self <= other` | ||
* @category Comparison | ||
*/ | ||
ltEq(other: any): T; | ||
/** | ||
* Compare self to other: `self =< other` | ||
* @category Comparison | ||
*/ | ||
lessThanEquals(other: any): T; | ||
/** | ||
* Compare self to other: `self < other` | ||
* @category Comparison | ||
*/ | ||
lt(other: any): T; | ||
/** | ||
* Compare self to other: `self < other` | ||
* @category Comparison | ||
*/ | ||
lessThan(other: any): T; | ||
/** | ||
* Compare self to other: `self !== other` | ||
* @category Comparison | ||
*/ | ||
neq(other: any): T; | ||
/** | ||
* Compare self to other: `self !== other` | ||
* @category Comparison | ||
*/ | ||
notEquals(other: any): T; | ||
} | ||
/** | ||
* A trait for cumulative operations. | ||
*/ | ||
export interface Cumulative<T> { | ||
/** Get an array with the cumulative count computed at every element. */ | ||
/** | ||
* Get an array with the cumulative count computed at every element. | ||
* @category Cumulative | ||
*/ | ||
cumCount(reverse?: boolean): T; | ||
@@ -49,4 +141,4 @@ cumCount({ reverse }: { | ||
* ``` | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumMax() | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumMax() | ||
* shape: (3,) | ||
@@ -60,2 +152,3 @@ * Series: 'b' [i64] | ||
* ``` | ||
* @category Cumulative | ||
*/ | ||
@@ -72,4 +165,4 @@ cumMax(reverse?: boolean): T; | ||
* ``` | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumMin() | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumMin() | ||
* shape: (3,) | ||
@@ -83,2 +176,3 @@ * Series: 'b' [i64] | ||
* ``` | ||
* @category Cumulative | ||
*/ | ||
@@ -95,4 +189,4 @@ cumMin(reverse?: boolean): T; | ||
* ``` | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumProd() | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumProd() | ||
* shape: (3,) | ||
@@ -106,2 +200,3 @@ * Series: 'b' [i64] | ||
* ``` | ||
* @category Cumulative | ||
*/ | ||
@@ -118,4 +213,4 @@ cumProd(reverse?: boolean): T; | ||
* ``` | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumSum() | ||
* > const s = pl.Series("a", [1, 2, 3]) | ||
* > s.cumSum() | ||
* shape: (3,) | ||
@@ -129,2 +224,3 @@ * Series: 'b' [i64] | ||
* ``` | ||
* @category Cumulative | ||
*/ | ||
@@ -136,2 +232,5 @@ cumSum(reverse?: boolean): T; | ||
} | ||
/** | ||
* __A trait for DataFrame and Series that allows for the application of a rolling window.__ | ||
*/ | ||
export interface Rolling<T> { | ||
@@ -152,2 +251,3 @@ /** | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
@@ -170,2 +270,3 @@ rollingMax(options: RollingOptions): T; | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
@@ -188,2 +289,3 @@ rollingMean(options: RollingOptions): T; | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
@@ -205,2 +307,3 @@ rollingMin(options: RollingOptions): T; | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
@@ -223,2 +326,3 @@ rollingStd(options: RollingOptions): T; | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
@@ -241,6 +345,10 @@ rollingSum(options: RollingOptions): T; | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
rollingVar(options: RollingOptions): T; | ||
rollingVar(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T; | ||
/** Compute a rolling median */ | ||
/** | ||
* Compute a rolling median | ||
* @category Rolling | ||
*/ | ||
rollingMedian(options: RollingOptions): T; | ||
@@ -258,8 +366,6 @@ rollingMedian(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T; | ||
* @param center - Set the labels at the center of the window | ||
* @category Rolling | ||
*/ | ||
rollingQuantile(options: RollingOptions & { | ||
quantile: number; | ||
interpolation?: Interpolation; | ||
}): T; | ||
rollingQuantile(quantile: number, interpolation?: Interpolation, windowSize?: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T; | ||
rollingQuantile(options: RollingQuantileOptions): T; | ||
rollingQuantile(quantile: number, interpolation?: InterpolationMethod, windowSize?: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T; | ||
/** | ||
@@ -269,8 +375,13 @@ * Compute a rolling skew | ||
* @param bias If false, then the calculations are corrected for statistical bias. | ||
* @category Rolling | ||
*/ | ||
rollingSkew(windowSize: number, bias?: boolean): T; | ||
rollingSkew({ windowSize, bias }: { | ||
windowSize: number; | ||
bias?: boolean; | ||
}): T; | ||
/** | ||
* Compute a rolling skew | ||
* @param options | ||
* @param options.windowSize Size of the rolling window | ||
* @param options.bias If false, then the calculations are corrected for statistical bias. | ||
* @category Rolling | ||
*/ | ||
rollingSkew(options: RollingSkewOptions): T; | ||
} | ||
@@ -283,2 +394,3 @@ export interface Round<T> { | ||
* @param decimals number of decimals to round by. | ||
* @category Math | ||
*/ | ||
@@ -292,2 +404,3 @@ round(decimals: number): T; | ||
* Only works on floating point Series | ||
* @category Math | ||
*/ | ||
@@ -298,2 +411,3 @@ floor(): T; | ||
* Only works on floating point Series | ||
* @category Math | ||
*/ | ||
@@ -307,2 +421,3 @@ ceil(): T; | ||
* @param max Maximum value | ||
* @category Math | ||
*/ | ||
@@ -324,8 +439,8 @@ clip(min: number, max: number): T; | ||
* ``` | ||
* >>> df = pl.DataFrame({ | ||
* >>> "foo": [1, 2, 3], | ||
* >>> "bar": [6, 7, 8], | ||
* >>> "ham": ['a', 'b', 'c'] | ||
* >>> }) | ||
* >>> df.sample({n: 2}) | ||
* > df = pl.DataFrame({ | ||
* > "foo": [1, 2, 3], | ||
* > "bar": [6, 7, 8], | ||
* > "ham": ['a', 'b', 'c'] | ||
* > }) | ||
* > df.sample({n: 2}) | ||
* shape: (2, 3) | ||
@@ -342,2 +457,3 @@ * ╭─────┬─────┬─────╮ | ||
* ``` | ||
* @category Math | ||
*/ | ||
@@ -360,2 +476,5 @@ sample(opts?: { | ||
} | ||
/** | ||
* Functions that can be applied to dtype List | ||
*/ | ||
export interface ListFunctions<T> { | ||
@@ -386,2 +505,3 @@ argMin(): T; | ||
* ``` | ||
* @category List | ||
*/ | ||
@@ -410,2 +530,3 @@ concat(other: (string | T)[] | string | T): T; | ||
* ``` | ||
* @category List | ||
*/ | ||
@@ -428,2 +549,3 @@ contains(item: any): T; | ||
* ``` | ||
* @category List | ||
*/ | ||
@@ -436,41 +558,45 @@ diff(n?: number, nullBehavior?: "ignore" | "drop"): T; | ||
* if an index is out of bounds, it will return a `null`. | ||
* @category List | ||
*/ | ||
get(index: number | Expr): T; | ||
/** | ||
Run any polars expression against the lists' elements | ||
Parameters | ||
---------- | ||
@param expr | ||
Expression to run. Note that you can select an element with `pl.first()`, or `pl.col()` | ||
@param parallel | ||
Run all expression parallel. Don't activate this blindly. | ||
Parallelism is worth it if there is enough work to do per thread. | ||
This likely should not be use in the groupby context, because we already parallel execution per group | ||
@example | ||
-------- | ||
>>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
>>> df.withColumn( | ||
... pl.concatList(["a", "b"]).lst.eval(pl.first().rank()).alias("rank") | ||
... ) | ||
shape: (3, 3) | ||
┌─────┬─────┬────────────┐ | ||
│ a ┆ b ┆ rank │ | ||
│ --- ┆ --- ┆ --- │ | ||
│ i64 ┆ i64 ┆ list [f32] │ | ||
╞═════╪═════╪════════════╡ | ||
│ 1 ┆ 4 ┆ [1.0, 2.0] │ | ||
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
│ 8 ┆ 5 ┆ [2.0, 1.0] │ | ||
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
│ 3 ┆ 2 ┆ [2.0, 1.0] │ | ||
└─────┴─────┴────────────┘ | ||
* Run any polars expression against the lists' elements | ||
* Parameters | ||
* ---------- | ||
* @param expr | ||
* Expression to run. Note that you can select an element with `pl.first()`, or `pl.col()` | ||
* @param parallel | ||
* Run all expression parallel. Don't activate this blindly. | ||
* Parallelism is worth it if there is enough work to do per thread. | ||
* This likely should not be use in the groupby context, because we already parallel execution per group | ||
* @example | ||
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) | ||
* >df.withColumn( | ||
* ... pl.concatList(["a", "b"]).lst.eval(pl.first().rank()).alias("rank") | ||
* ... ) | ||
* shape: (3, 3) | ||
* ┌─────┬─────┬────────────┐ | ||
* │ a ┆ b ┆ rank │ | ||
* │ --- ┆ --- ┆ --- │ | ||
* │ i64 ┆ i64 ┆ list [f32] │ | ||
* ╞═════╪═════╪════════════╡ | ||
* │ 1 ┆ 4 ┆ [1.0, 2.0] │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 8 ┆ 5 ┆ [2.0, 1.0] │ | ||
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ | ||
* │ 3 ┆ 2 ┆ [2.0, 1.0] │ | ||
* └─────┴─────┴────────────┘ | ||
* @category List | ||
*/ | ||
eval(expr: Expr, parallel: boolean): T; | ||
/** Get the first value of the sublists. */ | ||
eval(expr: Expr, parallel?: boolean): T; | ||
/** | ||
* Get the first value of the sublists. | ||
* @category List | ||
*/ | ||
first(): T; | ||
/** | ||
* Slice the head of every sublist | ||
* @param n How many values to take in the slice. | ||
* @param n - How many values to take in the slice. | ||
* @example | ||
* -------- | ||
* ``` | ||
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]]) | ||
@@ -484,2 +610,4 @@ * s.lst.head(2) | ||
* ] | ||
* ``` | ||
* @category List | ||
*/ | ||
@@ -489,5 +617,5 @@ head(n: number): T; | ||
* Slice the tail of every sublist | ||
* @param n How many values to take in the slice. | ||
* @param n - How many values to take in the slice. | ||
* @example | ||
* -------- | ||
* ``` | ||
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]]) | ||
@@ -501,2 +629,4 @@ * s.lst.tail(2) | ||
* ] | ||
* ``` | ||
* @category List | ||
*/ | ||
@@ -509,13 +639,53 @@ tail(n: number): T; | ||
* If omitted, the list elements are separated with a comma. | ||
* @category List | ||
*/ | ||
join(separator?: string): T; | ||
/** Get the last value of the sublists. */ | ||
/** | ||
* Get the last value of the sublists. | ||
* @category List | ||
*/ | ||
last(): T; | ||
/** | ||
* Get the length of the sublists. | ||
* @category List | ||
*/ | ||
lengths(): T; | ||
/** | ||
* Get the maximum value of the sublists. | ||
* @category List | ||
*/ | ||
max(): T; | ||
/** | ||
* Get the mean value of the sublists. | ||
* @category List | ||
*/ | ||
mean(): T; | ||
/** | ||
* Get the median value of the sublists. | ||
* @category List | ||
*/ | ||
min(): T; | ||
/** | ||
* Reverse the sublists. | ||
* @category List | ||
*/ | ||
reverse(): T; | ||
/** | ||
* Shift the sublists. | ||
* @param periods - Number of periods to shift. Can be positive or negative. | ||
* @category List | ||
*/ | ||
shift(periods: number): T; | ||
/** | ||
* Slice the sublists. | ||
* @param offset - The offset of the slice. | ||
* @param length - The length of the slice. | ||
* @category List | ||
*/ | ||
slice(offset: number, length: number): T; | ||
/** | ||
* Sort the sublists. | ||
* @param reverse - Sort in reverse order. | ||
* @category List | ||
*/ | ||
sort(reverse?: boolean): T; | ||
@@ -525,5 +695,16 @@ sort(opt: { | ||
}): T; | ||
/** | ||
* Sum all elements of the sublists. | ||
* @category List | ||
*/ | ||
sum(): T; | ||
/** | ||
* Get the unique values of the sublists. | ||
* @category List | ||
*/ | ||
unique(): T; | ||
} | ||
/** | ||
* Functions that can be applied to a Date or Datetime column. | ||
*/ | ||
export interface DateFunctions<T> { | ||
@@ -622,2 +803,174 @@ /** | ||
} | ||
export interface StringFunctions<T> { | ||
/** | ||
* Vertically concat the values in the Series to a single string value. | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({"foo": [1, null, 2]}) | ||
* > df = df.select(pl.col("foo").str.concat("-")) | ||
* > df | ||
* shape: (1, 1) | ||
* ┌──────────┐ | ||
* │ foo │ | ||
* │ --- │ | ||
* │ str │ | ||
* ╞══════════╡ | ||
* │ 1-null-2 │ | ||
* └──────────┘ | ||
* ``` | ||
*/ | ||
concat(delimiter: string): T; | ||
/** Check if strings in Series contain regex pattern. */ | ||
contains(pat: string | RegExp): T; | ||
/** | ||
* Decodes a value using the provided encoding | ||
* @param encoding - hex | base64 | ||
* @param strict - how to handle invalid inputs | ||
* | ||
* - true: method will throw error if unable to decode a value | ||
* - false: unhandled values will be replaced with `null` | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({"strings": ["666f6f", "626172", null]}) | ||
* > df.select(col("strings").str.decode("hex")) | ||
* shape: (3, 1) | ||
* ┌─────────┐ | ||
* │ strings │ | ||
* │ --- │ | ||
* │ str │ | ||
* ╞═════════╡ | ||
* │ foo │ | ||
* ├╌╌╌╌╌╌╌╌╌┤ | ||
* │ bar │ | ||
* ├╌╌╌╌╌╌╌╌╌┤ | ||
* │ null │ | ||
* └─────────┘ | ||
* ``` | ||
*/ | ||
decode(encoding: "hex" | "base64", strict?: boolean): T; | ||
decode(options: { | ||
encoding: "hex" | "base64"; | ||
strict?: boolean; | ||
}): T; | ||
/** | ||
* Encodes a value using the provided encoding | ||
* @param encoding - hex | base64 | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({"strings", ["foo", "bar", null]}) | ||
* > df.select(col("strings").str.encode("hex")) | ||
* shape: (3, 1) | ||
* ┌─────────┐ | ||
* │ strings │ | ||
* │ --- │ | ||
* │ str │ | ||
* ╞═════════╡ | ||
* │ 666f6f │ | ||
* ├╌╌╌╌╌╌╌╌╌┤ | ||
* │ 626172 │ | ||
* ├╌╌╌╌╌╌╌╌╌┤ | ||
* │ null │ | ||
* └─────────┘ | ||
* ``` | ||
*/ | ||
encode(encoding: "hex" | "base64"): T; | ||
/** | ||
* Extract the target capture group from provided patterns. | ||
* @param pattern A valid regex pattern | ||
* @param groupIndex Index of the targeted capture group. | ||
* Group 0 mean the whole pattern, first group begin at index 1 | ||
* Default to the first capture group | ||
* @returns Utf8 array. Contain null if original value is null or regex capture nothing. | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'a': [ | ||
* ... 'http://vote.com/ballon_dor?candidate=messi&ref=polars', | ||
* ... 'http://vote.com/ballon_dor?candidat=jorginho&ref=polars', | ||
* ... 'http://vote.com/ballon_dor?candidate=ronaldo&ref=polars' | ||
* ... ]}) | ||
* > df.select(pl.col('a').str.extract(/candidate=(\w+)/, 1)) | ||
* shape: (3, 1) | ||
* ┌─────────┐ | ||
* │ a │ | ||
* │ --- │ | ||
* │ str │ | ||
* ╞═════════╡ | ||
* │ messi │ | ||
* ├╌╌╌╌╌╌╌╌╌┤ | ||
* │ null │ | ||
* ├╌╌╌╌╌╌╌╌╌┤ | ||
* │ ronaldo │ | ||
* └─────────┘ | ||
* ``` | ||
*/ | ||
extract(pat: string | RegExp, groupIndex: number): T; | ||
/** | ||
* Extract the first match of json string with provided JSONPath expression. | ||
* Throw errors if encounter invalid json strings. | ||
* All return value will be casted to Utf8 regardless of the original value. | ||
* @see https://goessner.net/articles/JsonPath/ | ||
* @param jsonPath - A valid JSON path query string | ||
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing. | ||
* @example | ||
* ``` | ||
* > df = pl.DataFrame({ | ||
* ... 'json_val': [ | ||
* ... '{"a":"1"}', | ||
* ... null, | ||
* ... '{"a":2}', | ||
* ... '{"a":2.1}', | ||
* ... '{"a":true}' | ||
* ... ] | ||
* ... }) | ||
* > df.select(pl.col('json_val').str.jsonPathMatch('$.a') | ||
* shape: (5,) | ||
* Series: 'json_val' [str] | ||
* [ | ||
* "1" | ||
* null | ||
* "2" | ||
* "2.1" | ||
* "true" | ||
* ] | ||
* ``` | ||
*/ | ||
jsonPathMatch(pat: string): T; | ||
/** Get length of the string values in the Series. */ | ||
lengths(): T; | ||
/** Remove leading whitespace. */ | ||
lstrip(): T; | ||
/** Replace first regex match with a string value. */ | ||
replace(pat: string | RegExp, val: string): T; | ||
/** Replace all regex matches with a string value. */ | ||
replaceAll(pat: string | RegExp, val: string): T; | ||
/** Modify the strings to their lowercase equivalent. */ | ||
toLowerCase(): T; | ||
/** Modify the strings to their uppercase equivalent. */ | ||
toUpperCase(): T; | ||
/** Remove trailing whitespace. */ | ||
rstrip(): T; | ||
/** | ||
* Create subslices of the string values of a Utf8 Series. | ||
* @param start - Start of the slice (negative indexing may be used). | ||
* @param length - Optional length of the slice. | ||
*/ | ||
slice(start: number, length?: number): T; | ||
/** | ||
* Split a string into substrings using the specified separator and return them as a Series. | ||
* @param separator — A string that identifies character or characters to use in separating the string. | ||
* @param inclusive Include the split character/string in the results | ||
*/ | ||
split(by: string, options?: { | ||
inclusive?: boolean; | ||
} | boolean): T; | ||
/** Remove leading and trailing whitespace. */ | ||
strip(): T; | ||
/** | ||
* Parse a Series of dtype Utf8 to a Date/Datetime Series. | ||
* @param datatype Date or Datetime. | ||
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html) | ||
*/ | ||
strptime(datatype: DataType.Date | DataType.Datetime, fmt?: string): T; | ||
} | ||
export interface Serialize { | ||
@@ -641,2 +994,5 @@ /** | ||
} | ||
/** | ||
* GroupBy operations that can be applied to a DataFrame or LazyFrame. | ||
*/ | ||
export interface GroupByOps<T> { | ||
@@ -689,3 +1045,3 @@ /** | ||
>>> dates = [ | ||
>dates = [ | ||
... "2020-01-01 13:45:48", | ||
@@ -698,6 +1054,6 @@ ... "2020-01-01 16:42:13", | ||
... ] | ||
>>> df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn( | ||
>df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn( | ||
... pl.col("dt").str.strptime(pl.Datetime) | ||
... ) | ||
>>> out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg( | ||
>out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg( | ||
... [ | ||
@@ -709,6 +1065,6 @@ ... pl.sum("a").alias("sum_a"), | ||
... ) | ||
>>> assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1]) | ||
>>> assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1]) | ||
>>> assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1]) | ||
>>> out | ||
>assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1]) | ||
>assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1]) | ||
>assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1]) | ||
>out | ||
shape: (6, 4) | ||
@@ -715,0 +1071,0 @@ ┌─────────────────────┬───────┬───────┬───────┐ |
import { Expr } from "./lazy/expr"; | ||
import { Series } from "./series/series"; | ||
import { Series } from "./series"; | ||
import { DataFrame } from "./dataframe"; | ||
export declare type ValueOrArray<T> = T | Array<ValueOrArray<T>>; | ||
export declare type ColumnSelection = ValueOrArray<string>; | ||
export declare type ExpressionSelection = ValueOrArray<Expr>; | ||
export declare type ColumnsOrExpr = ColumnSelection | ExpressionSelection; | ||
export declare type ExprOrString = Expr | string; | ||
export declare type DownsampleRule = "month" | "week" | "day" | "hour" | "minute" | "second"; | ||
export declare type FillNullStrategy = "backward" | "forward" | "mean" | "min" | "max" | "zero" | "one"; | ||
export declare type RankMethod = "average" | "min" | "max" | "dense" | "ordinal" | "random"; | ||
export declare type RollingOptions = { | ||
windowSize: number; | ||
weights?: Array<number>; | ||
minPeriods?: number; | ||
center?: boolean; | ||
}; | ||
/** @ignore */ | ||
export type ValueOrArray<T> = T | Array<ValueOrArray<T>>; | ||
/** @ignore */ | ||
export type ColumnSelection = ValueOrArray<string>; | ||
/** @ignore */ | ||
export type ExpressionSelection = ValueOrArray<Expr>; | ||
/** @ignore */ | ||
export type ColumnsOrExpr = ColumnSelection | ExpressionSelection; | ||
/** @ignore */ | ||
export type ExprOrString = Expr | string; | ||
/** @ignore */ | ||
export declare function columnOrColumns(columns: ColumnSelection | string | Array<string> | undefined): Array<string> | undefined; | ||
/** @ignore */ | ||
export declare function columnOrColumnsStrict(...columns: string[] | ValueOrArray<string>[]): Array<string>; | ||
/** @ignore */ | ||
export declare function selectionToExprList(columns: any[], stringToLit?: any): any[]; | ||
/** @ignore */ | ||
export declare function isPath(s: string, expectedExtensions?: string[]): boolean; | ||
@@ -22,0 +22,0 @@ export declare const range: (start: number, end: number) => number[]; |
@@ -8,6 +8,7 @@ "use strict"; | ||
const expr_1 = require("./lazy/expr"); | ||
const series_1 = require("./series/series"); | ||
const series_1 = require("./series"); | ||
const dataframe_1 = require("./dataframe"); | ||
const path_1 = __importDefault(require("path")); | ||
const types_1 = require("util/types"); | ||
/** @ignore */ | ||
function columnOrColumns(columns) { | ||
@@ -19,2 +20,3 @@ if (columns) { | ||
exports.columnOrColumns = columnOrColumns; | ||
/** @ignore */ | ||
function columnOrColumnsStrict(...columns) { | ||
@@ -24,9 +26,13 @@ return columns.flat(3); | ||
exports.columnOrColumnsStrict = columnOrColumnsStrict; | ||
/** @ignore */ | ||
function selectionToExprList(columns, stringToLit) { | ||
return [columns].flat(3).map(expr => (0, expr_1.exprToLitOrExpr)(expr, stringToLit)._expr); | ||
return [columns] | ||
.flat(3) | ||
.map((expr) => (0, expr_1.exprToLitOrExpr)(expr, stringToLit)._expr); | ||
} | ||
exports.selectionToExprList = selectionToExprList; | ||
/** @ignore */ | ||
function isPath(s, expectedExtensions) { | ||
const { base, ext, name } = path_1.default.parse(s); | ||
return Boolean(base && ext && name) && !!(expectedExtensions?.includes(ext)); | ||
return Boolean(base && ext && name) && !!expectedExtensions?.includes(ext); | ||
} | ||
@@ -33,0 +39,0 @@ exports.isPath = isPath; |
{ | ||
"name": "nodejs-polars", | ||
"version": "0.7.2", | ||
"version": "0.7.3", | ||
"repository": "https://github.com/pola-rs/nodejs-polars.git", | ||
@@ -27,3 +27,6 @@ "license": "SEE LICENSE IN LICENSE", | ||
"aarch64-unknown-linux-gnu", | ||
"i686-pc-windows-msvc" | ||
"aarch64-unknown-linux-musl", | ||
"aarch64-linux-android", | ||
"i686-pc-windows-msvc", | ||
"x86_64-unknown-linux-musl" | ||
] | ||
@@ -47,32 +50,23 @@ } | ||
"format:rs": "cargo fmt", | ||
"format:source": "prettier --config ./package.json --write './**/*.{js,ts}'", | ||
"format:yaml": "prettier --parser yaml --write './**/*.{yml,yaml}'", | ||
"lint:ts": "eslint -c ./.eslintrc.json 'polars/**/*.{ts,tsx,js}' '__tests__/*.ts'", | ||
"lint:ts:fix": "rome check --apply-suggested {polars,__tests__} && rome format --write {polars,__tests__}", | ||
"lint:ts": "rome check {polars,__tests__} && rome format {polars,__tests__}", | ||
"lint": "yarn lint:ts && yarn format:rs", | ||
"prepublishOnly": "napi prepublish -t npm", | ||
"test": "jest", | ||
"version": "napi version" | ||
"version": "napi version", | ||
"precommit": "yarn lint && yarn test" | ||
}, | ||
"devDependencies": { | ||
"@napi-rs/cli": "^2.13.3", | ||
"@napi-rs/cli": "^2.14.1", | ||
"@types/chance": "^1.1.3", | ||
"@types/jest": "^27.0.3", | ||
"@types/node": "^16.11.9", | ||
"@typescript-eslint/eslint-plugin": "^5.4.0", | ||
"@typescript-eslint/parser": "^5.4.0", | ||
"chance": "^1.1.8", | ||
"eslint": "^8.1.0", | ||
"eslint-config-prettier": "^8.3.0", | ||
"eslint-plugin-import": "^2.25.3", | ||
"eslint-plugin-jest": "^25.2.4", | ||
"eslint-plugin-node": "^11.1.0", | ||
"eslint-plugin-prettier": "^4.0.0", | ||
"husky": "^7.0.4", | ||
"jest": "^27.3.1", | ||
"lint-staged": "^11.2.6", | ||
"prettier": "^2.4.1", | ||
"rome": "^11.0.0", | ||
"source-map-support": "^0.5.21", | ||
"ts-jest": "^27.1.0", | ||
"ts-node": "^10.4.0", | ||
"typedoc": "^0.22.9", | ||
"typescript": "4.4.3" | ||
"typedoc": "^0.23", | ||
"typescript": "4.9" | ||
}, | ||
@@ -84,9 +78,12 @@ "packageManager": "yarn@3.3.1", | ||
"optionalDependencies": { | ||
"nodejs-polars-win32-x64-msvc": "0.7.2", | ||
"nodejs-polars-darwin-x64": "0.7.2", | ||
"nodejs-polars-linux-x64-gnu": "0.7.2", | ||
"nodejs-polars-darwin-arm64": "0.7.2", | ||
"nodejs-polars-linux-arm64-gnu": "0.7.2", | ||
"nodejs-polars-win32-ia32-msvc": "0.7.2" | ||
"nodejs-polars-win32-x64-msvc": "0.7.3", | ||
"nodejs-polars-darwin-x64": "0.7.3", | ||
"nodejs-polars-linux-x64-gnu": "0.7.3", | ||
"nodejs-polars-darwin-arm64": "0.7.3", | ||
"nodejs-polars-linux-arm64-gnu": "0.7.3", | ||
"nodejs-polars-linux-arm64-musl": "0.7.3", | ||
"nodejs-polars-android-arm64": "0.7.3", | ||
"nodejs-polars-win32-ia32-msvc": "0.7.3", | ||
"nodejs-polars-linux-x64-musl": "0.7.3" | ||
} | ||
} |
@@ -24,4 +24,4 @@ # Polars | ||
```js | ||
>>> const fooSeries = pl.Series("foo", [1, 2, 3]) | ||
>>> fooSeries.sum() | ||
> const fooSeries = pl.Series("foo", [1, 2, 3]) | ||
> fooSeries.sum() | ||
6 | ||
@@ -31,4 +31,4 @@ | ||
// you can see the full specs in the docs or the type definitions | ||
>>> fooSeries.sort(true) | ||
>>> fooSeries.sort({reverse: true}) | ||
> fooSeries.sort(true) | ||
> fooSeries.sort({reverse: true}) | ||
shape: (3,) | ||
@@ -41,10 +41,10 @@ Series: 'foo' [f64] | ||
] | ||
>>> fooSeries.toArray() | ||
> fooSeries.toArray() | ||
[1, 2, 3] | ||
// Series are 'Iterables' so you can use javascript iterable syntax on them | ||
>>> [...fooSeries] | ||
> [...fooSeries] | ||
[1, 2, 3] | ||
>>> fooSeries[0] | ||
> fooSeries[0] | ||
1 | ||
@@ -57,3 +57,3 @@ | ||
```js | ||
>>> const df = pl.DataFrame( | ||
>const df = pl.DataFrame( | ||
... { | ||
@@ -66,5 +66,3 @@ ... A: [1, 2, 3, 4, 5], | ||
... ) | ||
>>> df | ||
... .sort("fruits") | ||
... .select( | ||
> df.sort("fruits").select( | ||
... "fruits", | ||
@@ -98,3 +96,3 @@ ... "cars", | ||
```js | ||
>>> df["cars"] // or df.getColumn("cars") | ||
> df["cars"] // or df.getColumn("cars") | ||
shape: (5,) | ||
@@ -146,3 +144,3 @@ Series: 'cars' [str] | ||
* Installation guide: `$ yarn install nodejs-polars` | ||
* [Node documentation](https://pola-rs.github.io/nodejs-polars/html/index.html) | ||
* [Node documentation](https://pola-rs.github.io/nodejs-polars/) | ||
* [User guide](https://pola-rs.github.io/polars-book/) | ||
@@ -149,0 +147,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
433484
12
11943
9
170