Socket
Socket
Sign inDemoInstall

@tensorflow/tfjs-data

Package Overview
Dependencies
Maintainers
13
Versions
126
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@tensorflow/tfjs-data - npm Package Compare versions

Comparing version 1.0.0-alpha1 to 1.0.0-alpha2

329

dist/dataset.d.ts

@@ -0,21 +1,350 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { LazyIterator } from './iterators/lazy_iterator';
import { DataElement, DatasetContainer } from './types';
/**
* Represents a potentially large list of independent data elements (typically
* 'samples' or 'examples').
*
* A 'data example' may be a primitive, an array, a map from string keys to
* values, or any nested structure of these.
*
* A `Dataset` represents an ordered collection of elements, together with a
* chain of transformations to be performed on those elements. Each
* transformation is a method of `Dataset` that returns another `Dataset`, so
* these may be chained, e.g.
* `const processedDataset = rawDataset.filter(...).map(...).batch(...)`.
*
* Data loading and transformation is done in a lazy, streaming fashion. The
* dataset may be iterated over multiple times; each iteration starts the data
* loading anew and recapitulates the transformations.
*
* A `Dataset` is typically processed as a stream of unbatched examples --i.e.,
* its transformations are applied one example at a time. Batching produces a
* new `Dataset` where each element is a batch. Batching should usually come
* last in a pipeline, because data transformations are easier to express on a
* per-example basis than on a per-batch basis.
*
* The following code examples are calling `await dataset.forEach(...)` to
* iterate once over the entire dataset in order to print out the data.
*/
/** @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'} */
export declare abstract class Dataset<T extends DataElement> {
abstract iterator(): Promise<LazyIterator<T>>;
readonly size: number;
/**
* Groups elements into batches and arranges their values in columnar
* form.
*
* It is assumed that each of the incoming dataset elements has the same
* set of keys. For each key, the resulting `Dataset` provides a batched
* element collecting all of the incoming values for that key. Incoming
* strings are grouped into a string[]. Incoming Tensors are grouped into a
* new Tensor where the 0'th axis is the batch dimension. These columnar
* representations for each key can be zipped together to reconstruct the
* original dataset elements.
*
* Batch a dataset of numbers:
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8]).batch(4);
* await a.forEach(e => e.print());
* ```
*
* Batch a dataset of arrays:
* ```js
* const b = tf.data.array([[1], [2], [3], [4], [5], [6], [7], [8]]).batch(4);
* await b.forEach(e => e.print());
* ```
*
* Batch a dataset of objects:
* ```js
* const c = tf.data.array([{a: 1, b: 11}, {a: 2, b: 12}, {a: 3, b: 13},
* {a: 4, b: 14}, {a: 5, b: 15}, {a: 6, b: 16}, {a: 7, b: 17},
* {a: 8, b: 18}]).batch(4);
* await c.forEach(e => {
* console.log('{');
* for(var key in e) {
* console.log(key+':');
* e[key].print();
* }
* console.log('}');
* })
* ```
*
* @param batchSize The number of elements desired per batch.
* @param smallLastBatch Whether to emit the final batch when it has fewer
* than batchSize elements. Default true.
* @returns A `Dataset`, from which a stream of batches can be obtained.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
batch(batchSize: number, smallLastBatch?: boolean): Dataset<DataElement>;
/**
* Concatenates this `Dataset` with another.
*
* ```js
* const a = tf.data.array([1, 2, 3]);
* const b = tf.data.array([4, 5, 6]);
* const c = a.concatenate(b);
* await c.forEach(e => console.log(e));
* ```
*
* @param dataset A `Dataset` to be concatenated onto this one.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
concatenate(dataset: Dataset<T>): Dataset<T>;
/**
* Filters this dataset according to `predicate`.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
* .filter(x => x%2 === 0);
* await a.forEach(e => console.log(e));
* ```
*
* @param predicate A function mapping a dataset element to a boolean or a
* `Promise` for one.
*
* @returns A `Dataset` of elements for which the predicate was true.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
filter(predicate: (value: T) => boolean): Dataset<T>;
/**
* Apply a function to every element of the dataset.
*
* After the function is applied to a dataset element, any Tensors contained
* within that element are disposed.
*
* ```js
* const a = tf.data.array([1, 2, 3]);
* await a.forEach(e => console.log(e));
* ```
*
* @param f A function to apply to each dataset element.
* @returns A `Promise` that resolves after all elements have been processed.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
forEach(f: (input: T) => void): Promise<void>;
/**
* Maps this dataset through a 1-to-1 transform.
*
* ```js
* const a = tf.data.array([1, 2, 3]).map(x => x*x);
* await a.forEach(e => console.log(e));
* ```
*
* @param transform A function mapping a dataset element to a transformed
* dataset element.
*
* @returns A `Dataset` of transformed elements.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
map<O extends DataElement>(transform: (value: T) => O): Dataset<O>;
/**
* Maps this dataset through an async 1-to-1 transform.
*
* ```js
* const a = tf.data.array([1, 2, 3]).map(x => new Promise(function(resolve){
* resolve(x*x);
* }));
* await a.forEach(e => e.then(function(value){
* console.log(value);
* }));
* ```
*
* @param transform A function mapping a dataset element to a `Promise` for a
* transformed dataset element. This transform is responsible for disposing
* any intermediate `Tensor`s, i.e. by wrapping its computation in
* `tf.tidy()`; that cannot be automated here (as it is in the synchronous
* `map()` case).
*
* @returns A `Dataset` of transformed elements.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
mapAsync<O extends DataElement>(transform: (value: T) => Promise<O>): Dataset<O>;
/**
* Creates a `Dataset` that prefetches elements from this dataset.
*
* @param bufferSize: An integer specifying the number of elements to be
* prefetched.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
prefetch(bufferSize: number): Dataset<T>;
/**
* Repeats this dataset `count` times.
*
* NOTE: If this dataset is a function of global state (e.g. a random number
* generator), then different repetitions may produce different elements.
*
* ```js
* const a = tf.data.array([1, 2, 3]).repeat(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param count: (Optional) An integer, representing the number of times
* the dataset should be repeated. The default behavior (if `count` is
* `undefined` or negative) is for the dataset be repeated indefinitely.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
repeat(count?: number): Dataset<T>;
/**
* Creates a `Dataset` that skips `count` initial elements from this dataset.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).skip(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param count: The number of elements of this dataset that should be skipped
* to form the new dataset. If `count` is greater than the size of this
* dataset, the new dataset will contain no elements. If `count`
* is `undefined` or negative, skips the entire dataset.
*
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
skip(count: number): Dataset<T>;
/**
* Pseudorandomly shuffles the elements of this dataset. This is done in a
* streaming manner, by sampling from a given number of prefetched elements.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).shuffle(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param bufferSize: An integer specifying the number of elements from this
* dataset from which the new dataset will sample.
* @param seed: (Optional) An integer specifying the random seed that will
* be used to create the distribution.
* @param reshuffleEachIteration: (Optional) A boolean, which if true
* indicates that the dataset should be pseudorandomly reshuffled each time
* it is iterated over. If false, elements will be returned in the same
* shuffled order on each iteration. (Defaults to `true`.)
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
shuffle(bufferSize: number, seed?: string, reshuffleEachIteration?: boolean): Dataset<T>;
/**
* Creates a `Dataset` with at most `count` initial elements from this
* dataset.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).take(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param count: The number of elements of this dataset that should be taken
* to form the new dataset. If `count` is `undefined` or negative, or if
* `count` is greater than the size of this dataset, the new dataset will
* contain all elements of this dataset.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
take(count: number): Dataset<T>;
/**
* Collect all elements of this dataset into an array.
*
* Obviously this will succeed only for small datasets that fit in memory.
* Useful for testing and generally should be avoided if possible.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]);
* console.log(await a.toArray());
* ```
*
* @returns A Promise for an array of elements, which will resolve
* when a new stream has been obtained and fully consumed.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
toArray(): Promise<T[]>;
}
/**
* Create a `Dataset` defined by a provided iterator() function.
*
* ```js
* let i = -1;
* const func = () =>
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
* const iter = tf.data.iteratorFromFunction(func);
* const ds = tf.data.datasetFromIteratorFn(iter);
* await ds.forEach(e => console.log(e));
* ```
*/
export declare function datasetFromIteratorFn<T extends DataElement>(iteratorFn: () => Promise<LazyIterator<T>>, size?: number): Dataset<T>;
/**
* Create a `Dataset` from an array of elements.
*
* Create a Dataset from an array of objects:
* ```js
* const a = tf.data.array([{'item': 1}, {'item': 2}, {'item': 3}]);
* await a.forEach(e => console.log(e));
* ```
*
* Create a Dataset from an array of numbers:
* ```js
* const a = tf.data.array([4, 5, 6]);
* await a.forEach(e => console.log(e));
* ```
* @param items An array of elements that will be parsed as items in a dataset.
*/
/** @doc {heading: 'Data', subheading: 'Creation', namespace: 'data'} */
export declare function array<T extends DataElement>(items: T[]): Dataset<T>;
/**
* Create a `Dataset` by zipping together an array, dict, or nested
* structure of `Dataset`s (and perhaps additional constants).
* The underlying datasets must provide elements in a consistent order such that
* they correspond.
*
* The number of elements in the resulting dataset is the same as the size of
* the smallest dataset in datasets.
*
* The nested structure of the `datasets` argument determines the
* structure of elements in the resulting iterator.
*
* Note this means that, given an array of two datasets that produce dict
* elements, the result is a dataset that produces elements that are arrays
* of two dicts:
*
* Zip an array of datasets:
* ```js
* console.log('Zip two datasets of objects:');
* const ds1 = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);
* const ds2 = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);
* const ds3 = tf.data.zip([ds1, ds2]);
* await ds3.forEach(e => console.log(JSON.stringify(e)));
*
* // If the goal is to merge the dicts in order to produce elements like
* // {a: ..., b: ...}, this requires a second step such as:
* console.log('Merge the objects:');
* const ds4 = ds3.map(x => {return {a: x[0].a, b: x[1].b}});
* await ds4.forEach(e => console.log(e));
* ```
*
* Zip a dict of datasets:
* ```js
* const a = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);
* const b = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);
* const c = tf.data.zip({c: a, d: b});
* await c.forEach(e => console.log(JSON.stringify(e)));
* ```
*/
/** @doc {heading: 'Data', subheading: 'Operations', namespace: 'data'} */
export declare function zip<O extends DataElement>(datasets: DatasetContainer): Dataset<O>;

469

dist/dataset.js
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -52,6 +69,90 @@ var extendStatics = Object.setPrototypeOf ||

var deep_map_1 = require("./util/deep_map");
var Dataset = (function () {
// TODO(soergel): consider vectorized operations within the pipeline.
/**
* Represents a potentially large list of independent data elements (typically
* 'samples' or 'examples').
*
* A 'data example' may be a primitive, an array, a map from string keys to
* values, or any nested structure of these.
*
* A `Dataset` represents an ordered collection of elements, together with a
* chain of transformations to be performed on those elements. Each
* transformation is a method of `Dataset` that returns another `Dataset`, so
* these may be chained, e.g.
* `const processedDataset = rawDataset.filter(...).map(...).batch(...)`.
*
* Data loading and transformation is done in a lazy, streaming fashion. The
* dataset may be iterated over multiple times; each iteration starts the data
* loading anew and recapitulates the transformations.
*
* A `Dataset` is typically processed as a stream of unbatched examples --i.e.,
* its transformations are applied one example at a time. Batching produces a
* new `Dataset` where each element is a batch. Batching should usually come
* last in a pipeline, because data transformations are easier to express on a
* per-example basis than on a per-batch basis.
*
* The following code examples are calling `await dataset.forEach(...)` to
* iterate once over the entire dataset in order to print out the data.
*/
/** @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'} */
var Dataset = /** @class */ (function () {
function Dataset() {
this.size = null;
/* TODO(soergel): for parity with tf.data:
Dataset.flat_map()
Dataset.dense_to_sparse_batch()
Dataset.group_by_window()
Dataset.padded_batch()
*/
}
// TODO(soergel): Make Datasets report whether repeated iterator() calls
// produce the same result (e.g., reading from a file) or different results
// (e.g., from the webcam). Currently we don't make this distinction but it
// could be important for the user to know.
// abstract isDeterministic(): boolean;
/**
* Groups elements into batches and arranges their values in columnar
* form.
*
* It is assumed that each of the incoming dataset elements has the same
* set of keys. For each key, the resulting `Dataset` provides a batched
* element collecting all of the incoming values for that key. Incoming
* strings are grouped into a string[]. Incoming Tensors are grouped into a
* new Tensor where the 0'th axis is the batch dimension. These columnar
* representations for each key can be zipped together to reconstruct the
* original dataset elements.
*
* Batch a dataset of numbers:
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8]).batch(4);
* await a.forEach(e => e.print());
* ```
*
* Batch a dataset of arrays:
* ```js
* const b = tf.data.array([[1], [2], [3], [4], [5], [6], [7], [8]]).batch(4);
* await b.forEach(e => e.print());
* ```
*
* Batch a dataset of objects:
* ```js
* const c = tf.data.array([{a: 1, b: 11}, {a: 2, b: 12}, {a: 3, b: 13},
* {a: 4, b: 14}, {a: 5, b: 15}, {a: 6, b: 16}, {a: 7, b: 17},
* {a: 8, b: 18}]).batch(4);
* await c.forEach(e => {
* console.log('{');
* for(var key in e) {
* console.log(key+':');
* e[key].print();
* }
* console.log('}');
* })
* ```
*
* @param batchSize The number of elements desired per batch.
* @param smallLastBatch Whether to emit the final batch when it has fewer
* than batchSize elements. Default true.
* @returns A `Dataset`, from which a stream of batches can be obtained.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.batch = function (batchSize, smallLastBatch) {

@@ -64,8 +165,14 @@ var _this = this;

if (this.size === Infinity || this.size == null) {
// If the size of this dataset is infinity or null, the new size keeps the
// same.
size = this.size;
}
else if (smallLastBatch) {
// If the size of this dataset is known and include small last batch, the
// new size is full batch count plus last batch.
size = Math.ceil(this.size / batchSize);
}
else {
// If the size of this dataset is known and not include small last batch,
// the new size is full batch count.
size = Math.floor(this.size / batchSize);

@@ -76,4 +183,4 @@ }

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent())
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent())
.columnMajorBatch(batchSize, smallLastBatch, deepBatchConcat)];

@@ -84,2 +191,16 @@ }

};
/**
* Concatenates this `Dataset` with another.
*
* ```js
* const a = tf.data.array([1, 2, 3]);
* const b = tf.data.array([4, 5, 6]);
* const c = a.concatenate(b);
* await c.forEach(e => console.log(e));
* ```
*
* @param dataset A `Dataset` to be concatenated onto this one.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.concatenate = function (dataset) {

@@ -90,8 +211,14 @@ var _this = this;

if (this.size === Infinity || dataset.size === Infinity) {
// If the size of any of these two dataset is infinity, new size is
// infinity.
size = Infinity;
}
else if (this.size != null && dataset.size != null) {
// If the size of both datasets are known and not infinity, new size is
// sum the size of these two datasets.
size = this.size + dataset.size;
}
else {
// If neither of these two datasets has infinity size and any of these two
// datasets' size is null, the new size is null.
size = null;

@@ -101,10 +228,25 @@ }

switch (_c.label) {
case 0: return [4, base.iterator()];
case 0: return [4 /*yield*/, base.iterator()];
case 1:
_b = (_a = (_c.sent())).concatenate;
return [4, dataset.iterator()];
case 2: return [2, _b.apply(_a, [_c.sent()])];
return [4 /*yield*/, dataset.iterator()];
case 2: return [2 /*return*/, _b.apply(_a, [_c.sent()])];
}
}); }); }, size);
};
/**
* Filters this dataset according to `predicate`.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
* .filter(x => x%2 === 0);
* await a.forEach(e => console.log(e));
* ```
*
* @param predicate A function mapping a dataset element to a boolean or a
* `Promise` for one.
*
* @returns A `Dataset` of elements for which the predicate was true.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.filter = function (predicate) {

@@ -115,5 +257,8 @@ var _this = this;

if (this.size === Infinity) {
// If the size of this dataset is infinity, new size is infinity
size = Infinity;
}
else {
// If this dataset has limited elements, new size is null because it might
// exhausted randomly.
size = null;

@@ -124,4 +269,4 @@ }

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent()).filter(function (x) { return tf.tidy(function () { return predicate(x); }); })];
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).filter(function (x) { return tf.tidy(function () { return predicate(x); }); })];
}

@@ -131,2 +276,17 @@ });

};
/**
* Apply a function to every element of the dataset.
*
* After the function is applied to a dataset element, any Tensors contained
* within that element are disposed.
*
* ```js
* const a = tf.data.array([1, 2, 3]);
* await a.forEach(e => console.log(e));
* ```
*
* @param f A function to apply to each dataset element.
* @returns A `Promise` that resolves after all elements have been processed.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.forEach = function (f) {

@@ -136,4 +296,4 @@ return __awaiter(this, void 0, void 0, function () {

switch (_a.label) {
case 0: return [4, this.iterator()];
case 1: return [2, (_a.sent()).forEach(f)];
case 0: return [4 /*yield*/, this.iterator()];
case 1: return [2 /*return*/, (_a.sent()).forEach(f)];
}

@@ -143,2 +303,16 @@ });

};
/**
* Maps this dataset through a 1-to-1 transform.
*
* ```js
* const a = tf.data.array([1, 2, 3]).map(x => x*x);
* await a.forEach(e => console.log(e));
* ```
*
* @param transform A function mapping a dataset element to a transformed
* dataset element.
*
* @returns A `Dataset` of transformed elements.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.map = function (transform) {

@@ -150,4 +324,4 @@ var _this = this;

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent()).map(function (x) { return tf.tidy(function () { return transform(x); }); })];
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).map(function (x) { return tf.tidy(function () { return transform(x); }); })];
}

@@ -157,2 +331,23 @@ });

};
/**
* Maps this dataset through an async 1-to-1 transform.
*
* ```js
* const a = tf.data.array([1, 2, 3]).map(x => new Promise(function(resolve){
* resolve(x*x);
* }));
* await a.forEach(e => e.then(function(value){
* console.log(value);
* }));
* ```
*
* @param transform A function mapping a dataset element to a `Promise` for a
* transformed dataset element. This transform is responsible for disposing
* any intermediate `Tensor`s, i.e. by wrapping its computation in
* `tf.tidy()`; that cannot be automated here (as it is in the synchronous
* `map()` case).
*
* @returns A `Dataset` of transformed elements.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.mapAsync = function (transform) {

@@ -164,4 +359,4 @@ var _this = this;

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent()).mapAsync(transform)];
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).mapAsync(transform)];
}

@@ -171,2 +366,11 @@ });

};
/**
* Creates a `Dataset` that prefetches elements from this dataset.
*
* @param bufferSize: An integer specifying the number of elements to be
* prefetched.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
// TODO: Document this function once tfjs-data supports streaming.
Dataset.prototype.prefetch = function (bufferSize) {

@@ -177,7 +381,24 @@ var _this = this;

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent()).prefetch(bufferSize)];
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).prefetch(bufferSize)];
}
}); }); }, this.size);
};
/**
* Repeats this dataset `count` times.
*
* NOTE: If this dataset is a function of global state (e.g. a random number
* generator), then different repetitions may produce different elements.
*
* ```js
* const a = tf.data.array([1, 2, 3]).repeat(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param count: (Optional) An integer, representing the number of times
* the dataset should be repeated. The default behavior (if `count` is
* `undefined` or negative) is for the dataset be repeated indefinitely.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.repeat = function (count) {

@@ -188,11 +409,18 @@ var _this = this;

if (this.size != null && count > 0) {
// If this dataset has size and count is positive, new size is current
// size multiply count. This also covers the case that current size is
// infinity.
size = this.size * count;
}
else if (count === 0) {
// If count is 0, new size is 0.
size = 0;
}
else if (this.size != null && (count === undefined || count < 0)) {
// If this dataset has size and count is undefined or negative, the
// dataset will be repeated indefinitely and new size is infinity.
size = Infinity;
}
else {
// If the size of this dataset is null, the new dataset's size is null.
size = null;

@@ -208,10 +436,26 @@ }

_a = {};
return [4, base.iterator()];
case 1: return [2, (_a.value = _b.sent(), _a.done = false, _a)];
return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.value = _b.sent(), _a.done = false, _a)];
}
}); }); });
return [2, lazy_iterator_1.iteratorFromConcatenated(iteratorIterator.take(count))];
return [2 /*return*/, lazy_iterator_1.iteratorFromConcatenated(iteratorIterator.take(count))];
});
}); }, size);
};
/**
* Creates a `Dataset` that skips `count` initial elements from this dataset.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).skip(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param count: The number of elements of this dataset that should be skipped
* to form the new dataset. If `count` is greater than the size of this
* dataset, the new dataset will contain no elements. If `count`
* is `undefined` or negative, skips the entire dataset.
*
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.skip = function (count) {

@@ -222,2 +466,5 @@ var _this = this;

if (this.size != null && count >= 0 && this.size >= count) {
// If the size of this dataset is greater than count, the new dataset's
// size is current size minus skipped size.This also covers the case that
// current size is infinity.
size = this.size - count;

@@ -227,5 +474,8 @@ }

(this.size < count || count === undefined || count < 0)) {
// If the size of this dataset is smaller than count, or count is
// undefined or negative, skips the entire dataset and the new size is 0.
size = 0;
}
else {
// If the size of this dataset is null, the new dataset's size is null.
size = null;

@@ -235,7 +485,28 @@ }

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent()).skip(count)];
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).skip(count)];
}
}); }); }, size);
};
// TODO(soergel): deep sharded shuffle, where supported
/**
* Pseudorandomly shuffles the elements of this dataset. This is done in a
* streaming manner, by sampling from a given number of prefetched elements.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).shuffle(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param bufferSize: An integer specifying the number of elements from this
* dataset from which the new dataset will sample.
* @param seed: (Optional) An integer specifying the random seed that will
* be used to create the distribution.
* @param reshuffleEachIteration: (Optional) A boolean, which if true
* indicates that the dataset should be pseudorandomly reshuffled each time
* it is iterated over. If false, elements will be returned in the same
* shuffled order on each iteration. (Defaults to `true`.)
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.shuffle = function (bufferSize, seed, reshuffleEachIteration) {

@@ -255,4 +526,4 @@ var _this = this;

}
return [4, base.iterator()];
case 1: return [2, (_a.sent()).shuffle(bufferSize, seed2.toString())];
return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).shuffle(bufferSize, seed2.toString())];
}

@@ -262,2 +533,18 @@ });

};
/**
* Creates a `Dataset` with at most `count` initial elements from this
* dataset.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]).take(3);
* await a.forEach(e => console.log(e));
* ```
*
* @param count: The number of elements of this dataset that should be taken
* to form the new dataset. If `count` is `undefined` or negative, or if
* `count` is greater than the size of this dataset, the new dataset will
* contain all elements of this dataset.
* @returns A `Dataset`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.take = function (count) {

@@ -268,8 +555,13 @@ var _this = this;

if (this.size != null && this.size > count) {
// If the size of this dataset is greater than count, the new dataset's
// size is count.
size = count;
}
else if (this.size != null && this.size <= count) {
// If the size of this dataset is equal or smaller than count, the new
// dataset's size is the size of this dataset.
size = this.size;
}
else {
// If the size of this dataset is null, the new dataset's size is null.
size = null;

@@ -279,7 +571,22 @@ }

switch (_a.label) {
case 0: return [4, base.iterator()];
case 1: return [2, (_a.sent()).take(count)];
case 0: return [4 /*yield*/, base.iterator()];
case 1: return [2 /*return*/, (_a.sent()).take(count)];
}
}); }); }, size);
};
/**
* Collect all elements of this dataset into an array.
*
* Obviously this will succeed only for small datasets that fit in memory.
* Useful for testing and generally should be avoided if possible.
*
* ```js
* const a = tf.data.array([1, 2, 3, 4, 5, 6]);
* console.log(await a.toArray());
* ```
*
* @returns A Promise for an array of elements, which will resolve
* when a new stream has been obtained and fully consumed.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
Dataset.prototype.toArray = function () {

@@ -289,4 +596,4 @@ return __awaiter(this, void 0, void 0, function () {

switch (_a.label) {
case 0: return [4, this.iterator()];
case 1: return [2, (_a.sent()).collect()];
case 0: return [4 /*yield*/, this.iterator()];
case 1: return [2 /*return*/, (_a.sent()).collect()];
}

@@ -299,5 +606,17 @@ });

exports.Dataset = Dataset;
/**
* Create a `Dataset` defined by a provided iterator() function.
*
* ```js
* let i = -1;
* const func = () =>
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
* const iter = tf.data.iteratorFromFunction(func);
* const ds = tf.data.datasetFromIteratorFn(iter);
* await ds.forEach(e => console.log(e));
* ```
*/
function datasetFromIteratorFn(iteratorFn, size) {
if (size === void 0) { size = null; }
return new (function (_super) {
return new /** @class */ (function (_super) {
__extends(class_1, _super);

@@ -309,6 +628,10 @@ function class_1() {

}
/*
* Provide a new stream of elements. Note this will also start new streams
* from any underlying `Dataset`s.
*/
class_1.prototype.iterator = function () {
return __awaiter(this, void 0, void 0, function () {
return __generator(this, function (_a) {
return [2, iteratorFn()];
return [2 /*return*/, iteratorFn()];
});

@@ -321,11 +644,69 @@ });

exports.datasetFromIteratorFn = datasetFromIteratorFn;
/**
* Create a `Dataset` from an array of elements.
*
* Create a Dataset from an array of objects:
* ```js
* const a = tf.data.array([{'item': 1}, {'item': 2}, {'item': 3}]);
* await a.forEach(e => console.log(e));
* ```
*
* Create a Dataset from an array of numbers:
* ```js
* const a = tf.data.array([4, 5, 6]);
* await a.forEach(e => console.log(e));
* ```
* @param items An array of elements that will be parsed as items in a dataset.
*/
/** @doc {heading: 'Data', subheading: 'Creation', namespace: 'data'} */
function array(items) {
var _this = this;
return datasetFromIteratorFn(function () { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) {
return [2, lazy_iterator_1.iteratorFromItems(items)];
return [2 /*return*/, lazy_iterator_1.iteratorFromItems(items)];
}); }); }, items.length);
}
exports.array = array;
/**
* Create a `Dataset` by zipping together an array, dict, or nested
* structure of `Dataset`s (and perhaps additional constants).
* The underlying datasets must provide elements in a consistent order such that
* they correspond.
*
* The number of elements in the resulting dataset is the same as the size of
* the smallest dataset in datasets.
*
* The nested structure of the `datasets` argument determines the
* structure of elements in the resulting iterator.
*
* Note this means that, given an array of two datasets that produce dict
* elements, the result is a dataset that produces elements that are arrays
* of two dicts:
*
* Zip an array of datasets:
* ```js
* console.log('Zip two datasets of objects:');
* const ds1 = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);
* const ds2 = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);
* const ds3 = tf.data.zip([ds1, ds2]);
* await ds3.forEach(e => console.log(JSON.stringify(e)));
*
* // If the goal is to merge the dicts in order to produce elements like
* // {a: ..., b: ...}, this requires a second step such as:
* console.log('Merge the objects:');
* const ds4 = ds3.map(x => {return {a: x[0].a, b: x[1].b}});
* await ds4.forEach(e => console.log(e));
* ```
*
* Zip a dict of datasets:
* ```js
* const a = tf.data.array([{a: 1}, {a: 2}, {a: 3}]);
* const b = tf.data.array([{b: 4}, {b: 5}, {b: 6}]);
* const c = tf.data.zip({c: a, d: b});
* await c.forEach(e => console.log(JSON.stringify(e)));
* ```
*/
/** @doc {heading: 'Data', subheading: 'Operations', namespace: 'data'} */
function zip(datasets) {
var _this = this;
// manually type-check the argument for JS users
if (!deep_map_1.isIterable(datasets)) {

@@ -351,3 +732,3 @@ throw new Error('The argument to zip() must be an object or array.');

switch (_a.label) {
case 0: return [4, deep_map_1.deepMapAndAwaitAll(datasets, function (d) {
case 0: return [4 /*yield*/, deep_map_1.deepMapAndAwaitAll(datasets, function (d) {
if (d instanceof Dataset) {

@@ -366,3 +747,3 @@ return { value: d.iterator(), recurse: false };

streams = _a.sent();
return [2, lazy_iterator_1.iteratorFromZipped(streams, lazy_iterator_1.ZipMismatchMode.SHORTEST)];
return [2 /*return*/, lazy_iterator_1.iteratorFromZipped(streams, lazy_iterator_1.ZipMismatchMode.SHORTEST)];
}

@@ -373,2 +754,10 @@ });

exports.zip = zip;
/**
* A zip function for use with deepZip, passed via the columnMajorBatch call.
*
* Accepts an array of identically-structured nested elements and either batches
* them (if they are primitives, numeric arrays, or Tensors) or requests
* recursion (if not).
*/
// tslint:disable-next-line:no-any
function deepBatchConcat(rows) {

@@ -378,7 +767,11 @@ if (rows === null) {

}
// use the first item to decide whether to recurse or batch here.
var exampleRow = rows[0];
if (typeof (exampleRow) === 'string') {
// rows is an array of strings, so it's already 'batched'.
// TODO(soergel): clean up the string special case when Tensor supports it.
return { value: rows, recurse: false };
}
if (!deep_map_1.isIterable(exampleRow)) {
// rows is an array of non-string primitives or Tensors, so batch them.
var value = batchConcat(rows);

@@ -388,18 +781,28 @@ return { value: value, recurse: false };

if (deep_map_1.isNumericArray(exampleRow)) {
// interpret an array of numbers as a leaf, so batching produces a 2d Tensor
var value = batchConcat(rows);
return { value: value, recurse: false };
}
// the example row is itself iterable, but not numeric, so recurse into it.
return { value: null, recurse: true };
}
/**
* Assembles a list of same-shaped numbers, number arrays, or Tensors
* into a single new Tensor where axis 0 is the batch dimension.
*/
function batchConcat(arrays) {
if (arrays.length === 0) {
// We can't return an empty Tensor because we don't know the element shape.
throw new Error('Can\'t make a batch of zero elements.');
}
if (arrays[0] instanceof tf.Tensor) {
// Input is an array of Tensors
return tf.stack(arrays);
}
else if (Array.isArray(arrays[0])) {
// Input is an array of arrays of numbers
return batchConcatArrays(arrays);
}
else {
// Input is a simple array of numbers
var numbers = arrays;

@@ -410,2 +813,4 @@ return tf.Tensor.make([numbers.length], { values: new Float32Array(numbers) });

function batchConcatArrays(arrays) {
// Should we first make row Tensors and then use tf.stack() here too?
// Probably not: the extra Tensor allocations would outweigh any benefit.
var rowLength = arrays[0].length;

@@ -412,0 +817,0 @@ var batchShape = [arrays.length, arrays[0].length];

@@ -0,1 +1,18 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { Dataset } from '../dataset';

@@ -6,2 +23,14 @@ import { DataSource } from '../datasource';

import { TextLineDataset } from './text_line_dataset';
/**
* Represents a potentially large collection of delimited text records.
*
* The produced `DataElement`s each contain one key-value pair for
* every column of the table. When a field is empty in the incoming data, the
* resulting value is `undefined`, or throw error if it is required. Values
* that can be parsed as numbers are emitted as type `number`, other values
* are parsed as `string`.
*
* The results are not batched.
*/
/** @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'} */
export declare class CSVDataset extends Dataset<DataElement> {

@@ -16,5 +45,46 @@ protected readonly input: DataSource;

private delimiter;
/**
* Returns column names of the csv dataset. If `configuredColumnsOnly` is
* true, return column names in `columnConfigs`. If `configuredColumnsOnly` is
* false and `columnNames` is provided, `columnNames`. If
* `configuredColumnsOnly` is false and `columnNames` is not provided, return
* all column names parsed from the csv file. For example usage please go to
* `tf.data.csv`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
columnNames(): Promise<string[]>;
private setColumnNames;
private maybeReadHeaderLine;
/**
* Create a `CSVDataset`.
*
* @param input A `DataSource` providing a chunked, UTF8-encoded byte stream.
* @param csvConfig (Optional) A CSVConfig object that contains configurations
* of reading and decoding from CSV file(s).
*
* hasHeader: (Optional) A boolean value that indicates whether the first
* row of provided CSV file is a header line with column names, and should
* not be included in the data. Defaults to `true`.
*
* columnNames: (Optional) A list of strings that corresponds to
* the CSV column names, in order. If provided, it ignores the column
* names inferred from the header row. If not provided, infers the column
* names from the first row of the records. If hasHeader is false and
* columnNames is not provided, this method throws an error.
*
* columnConfigs: (Optional) A dictionary whose key is column names, value
* is an object stating if this column is required, column's data type,
* default value, and if this column is label. If provided, keys must
* correspond to names provided in columnNames or inferred from the file
* header lines. If isLabel is true any column, returns an array of two
* items: the first item is a dict of features key/value pairs, the second
* item is a dict of labels key/value pairs. If no feature is marked as
* label, returns a dict of features only.
*
* configuredColumnsOnly (Optional) If true, only columns provided in
* columnConfigs will be parsed and provided during iteration.
*
* delimiter (Optional) The string used to parse each line of the input
* file. Defaults to `,`.
*/
constructor(input: DataSource, csvConfig?: CSVConfig);

@@ -21,0 +91,0 @@ iterator(): Promise<LazyIterator<DataElement>>;

"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -57,4 +74,48 @@ var extendStatics = Object.setPrototypeOf ||

var STATE_WITHIN_QUOTE_IN_QUOTE = Symbol('quoteinquote');
var CSVDataset = (function (_super) {
/**
* Represents a potentially large collection of delimited text records.
*
* The produced `DataElement`s each contain one key-value pair for
* every column of the table. When a field is empty in the incoming data, the
* resulting value is `undefined`, or throw error if it is required. Values
* that can be parsed as numbers are emitted as type `number`, other values
* are parsed as `string`.
*
* The results are not batched.
*/
/** @doc {heading: 'Data', subheading: 'Classes', namespace: 'data'} */
var CSVDataset = /** @class */ (function (_super) {
__extends(CSVDataset, _super);
/**
* Create a `CSVDataset`.
*
* @param input A `DataSource` providing a chunked, UTF8-encoded byte stream.
* @param csvConfig (Optional) A CSVConfig object that contains configurations
* of reading and decoding from CSV file(s).
*
* hasHeader: (Optional) A boolean value that indicates whether the first
* row of provided CSV file is a header line with column names, and should
* not be included in the data. Defaults to `true`.
*
* columnNames: (Optional) A list of strings that corresponds to
* the CSV column names, in order. If provided, it ignores the column
* names inferred from the header row. If not provided, infers the column
* names from the first row of the records. If hasHeader is false and
* columnNames is not provided, this method throws an error.
*
* columnConfigs: (Optional) A dictionary whose key is column names, value
* is an object stating if this column is required, column's data type,
* default value, and if this column is label. If provided, keys must
* correspond to names provided in columnNames or inferred from the file
* header lines. If isLabel is true any column, returns an array of two
* items: the first item is a dict of features key/value pairs, the second
* item is a dict of labels key/value pairs. If no feature is marked as
* label, returns a dict of features only.
*
* configuredColumnsOnly (Optional) If true, only columns provided in
* columnConfigs will be parsed and provided during iteration.
*
* delimiter (Optional) The string used to parse each line of the input
* file. Defaults to `,`.
*/
function CSVDataset(input, csvConfig) {

@@ -80,2 +141,11 @@ var _this = _super.call(this) || this;

}
/**
* Returns column names of the csv dataset. If `configuredColumnsOnly` is
* true, return column names in `columnConfigs`. If `configuredColumnsOnly` is
* false and `columnNames` is provided, `columnNames`. If
* `configuredColumnsOnly` is false and `columnNames` is not provided, return
* all column names parsed from the csv file. For example usage please go to
* `tf.data.csv`.
*/
/** @doc {heading: 'Data', subheading: 'Classes'} */
CSVDataset.prototype.columnNames = function () {

@@ -86,8 +156,8 @@ return __awaiter(this, void 0, void 0, function () {

case 0:
if (!!this.columnNamesValidated) return [3, 2];
return [4, this.setColumnNames()];
if (!!this.columnNamesValidated) return [3 /*break*/, 2];
return [4 /*yield*/, this.setColumnNames()];
case 1:
_a.sent();
_a.label = 2;
case 2: return [2, this.configuredColumnsOnly ? Object.keys(this.columnConfigs) :
case 2: return [2 /*return*/, this.configuredColumnsOnly ? Object.keys(this.columnConfigs) :
this.fullColumnNames];

@@ -98,2 +168,10 @@ }

};
/* 1) If `columnNames` is provided as string[], use this string[] as output
* keys in corresponding order. The length must match the number of inferred
* columns if `hasHeader` is true .
* 2) If `columnNames` is not provided, parse header line as `columnNames` if
* hasHeader is true. If `hasHeader` is false, throw an error.
* 3) If `columnConfigs` is provided, all the keys in `columnConfigs` must
* exist in parsed `columnNames`.
*/
CSVDataset.prototype.setColumnNames = function () {

@@ -104,9 +182,11 @@ return __awaiter(this, void 0, void 0, function () {

switch (_b.label) {
case 0: return [4, this.maybeReadHeaderLine()];
case 0: return [4 /*yield*/, this.maybeReadHeaderLine()];
case 1:
columnNamesFromFile = _b.sent();
if (!this.fullColumnNames && !columnNamesFromFile) {
// Throw an error if columnNames is not provided and no header line.
throw new Error('Column names must be provided if there is no header line.');
}
else if (this.fullColumnNames && columnNamesFromFile) {
// Check provided columnNames match header line.
tfjs_core_1.util.assert(columnNamesFromFile.length === this.fullColumnNames.length, 'The length of provided columnNames (' +

@@ -126,2 +206,3 @@ this.fullColumnNames.length.toString() +

tfjs_core_1.util.assert(duplicateNames.length === 0, 'Duplicate column names found: ' + duplicateNames.toString());
// Check if keys in columnConfigs match columnNames.
if (this.columnConfigs) {

@@ -139,3 +220,3 @@ for (_i = 0, _a = Object.keys(this.columnConfigs); _i < _a.length; _i++) {

this.columnNamesValidated = true;
return [2];
return [2 /*return*/];
}

@@ -151,7 +232,7 @@ });

case 0:
if (!this.hasHeader) return [3, 3];
return [4, this.base.iterator()];
if (!this.hasHeader) return [3 /*break*/, 3];
return [4 /*yield*/, this.base.iterator()];
case 1:
iter = _a.sent();
return [4, iter.next()];
return [4 /*yield*/, iter.next()];
case 2:

@@ -163,4 +244,4 @@ firstElement = _a.sent();

firstLine = firstElement.value;
return [2, firstLine.split(this.delimiter)];
case 3: return [2, null];
return [2 /*return*/, firstLine.split(this.delimiter)];
case 3: return [2 /*return*/, null];
}

@@ -177,14 +258,16 @@ });

case 0:
if (!!this.columnNamesValidated) return [3, 2];
return [4, this.setColumnNames()];
if (!!this.columnNamesValidated) return [3 /*break*/, 2];
return [4 /*yield*/, this.setColumnNames()];
case 1:
_a.sent();
_a.label = 2;
case 2: return [4, this.base.iterator()];
case 2: return [4 /*yield*/, this.base.iterator()];
case 3:
lines = _a.sent();
if (this.hasHeader) {
// We previously read the first line to get the columnNames.
// Now that we're providing data, skip it.
lines = lines.skip(1);
}
return [2, lines.map(function (x) { return _this.makeDataElement(x); })];
return [2 /*return*/, lines.map(function (x) { return _this.makeDataElement(x); })];
}

@@ -202,2 +285,3 @@ });

if (this.configuredColumnsOnly && !config) {
// This column is not selected.
continue;

@@ -209,2 +293,4 @@ }

if (value === '') {
// If default value is provided, use it. If default value is not
// provided, set as undefined.
if (config && config.default !== undefined) {

@@ -221,4 +307,7 @@ parsedValue = config.default;

else {
// A value is present, so parse it based on type
var valueAsNum = Number(value);
if (isNaN(valueAsNum)) {
// The value is a string and this column is declared as boolean
// in config, parse it as boolean.
if (config && config.dtype === 'bool') {

@@ -228,2 +317,3 @@ parsedValue = this.getBoolean(value);

else {
// Set value as string
parsedValue = value;

@@ -233,5 +323,9 @@ }

else if (!config || !config.dtype) {
// If this value is a number and no type config is provided, return
// it as number.
parsedValue = valueAsNum;
}
else {
// If this value is a number and data type is provided, parse it
// according to provided data type.
switch (config.dtype) {

@@ -252,2 +346,3 @@ case 'float32':

}
// Check if this column is label.
(config && config.isLabel) ? labels[key] = parsedValue :

@@ -257,2 +352,4 @@ features[key] = parsedValue;

}
// If label exists, return an array of features and labels, otherwise
// return features only.
if (Object.keys(labels).length === 0) {

@@ -273,2 +370,3 @@ return features;

};
// adapted from https://beta.observablehq.com/@mbostock/streaming-csv
CSVDataset.prototype.parseRow = function (line) {

@@ -279,6 +377,9 @@ var result = [];

var currentState = STATE_FIELD;
// Goes through the line to parse quote.
for (var i = 0; i < readLength; i++) {
switch (currentState) {
// Before enter a new field
case STATE_OUT:
switch (line.charAt(i)) {
// Enter a quoted field
case CODE_QUOTE:

@@ -288,2 +389,3 @@ readOffset = i + 1;

break;
// Read an empty field
case this.delimiter:

@@ -294,2 +396,3 @@ result.push('');

break;
// Enter an unquoted field
default:

@@ -301,4 +404,6 @@ currentState = STATE_FIELD;

break;
// In an unquoted field
case STATE_FIELD:
switch (line.charAt(i)) {
// Exit an unquoted field, add it to result
case this.delimiter:

@@ -312,4 +417,6 @@ result.push(line.substring(readOffset, i));

break;
// In a quoted field
case STATE_QUOTE:
switch (line.charAt(i)) {
// Read a quote after a quote
case CODE_QUOTE:

@@ -321,4 +428,6 @@ currentState = STATE_QUOTE_AFTER_QUOTE;

break;
// This state means it's right after a second quote in a field
case STATE_QUOTE_AFTER_QUOTE:
switch (line.charAt(i)) {
// Finished a quoted field
case this.delimiter:

@@ -329,5 +438,7 @@ result.push(line.substring(readOffset, i - 1));

break;
// Finished a quoted part in a quoted field
case CODE_QUOTE:
currentState = STATE_QUOTE;
break;
// In a quoted part in a quoted field
default:

@@ -340,2 +451,3 @@ currentState = STATE_WITHIN_QUOTE_IN_QUOTE;

switch (line.charAt(i)) {
// Exit a quoted part in a quoted field
case CODE_QUOTE:

@@ -350,2 +462,3 @@ currentState = STATE_QUOTE;

}
// Adds last item based on if it is quoted.
if (currentState === STATE_QUOTE_AFTER_QUOTE) {

@@ -362,2 +475,5 @@ result.push(line.substring(readOffset, readLength - 1));

exports.CSVDataset = CSVDataset;
// TODO(soergel): add more basic datasets for parity with tf.data
// tf.data.FixedLengthRecordDataset()
// tf.data.TFRecordDataset()
//# sourceMappingURL=csv_dataset.js.map

@@ -0,8 +1,35 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { Dataset } from '../dataset';
import { DataSource } from '../datasource';
import { LazyIterator } from '../iterators/lazy_iterator';
/**
* Represents a potentially large collection of text lines.
*
* The results are not batched.
*/
export declare class TextLineDataset extends Dataset<string> {
protected readonly input: DataSource;
/**
* Create a `TextLineDataset`.
*
* @param input A `DataSource` providing a chunked, UTF8-encoded byte stream.
*/
constructor(input: DataSource);
iterator(): Promise<LazyIterator<string>>;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -49,4 +66,14 @@ var extendStatics = Object.setPrototypeOf ||

var dataset_1 = require("../dataset");
var TextLineDataset = (function (_super) {
/**
* Represents a potentially large collection of text lines.
*
* The results are not batched.
*/
var TextLineDataset = /** @class */ (function (_super) {
__extends(TextLineDataset, _super);
/**
* Create a `TextLineDataset`.
*
* @param input A `DataSource` providing a chunked, UTF8-encoded byte stream.
*/
function TextLineDataset(input) {

@@ -62,3 +89,3 @@ var _this = _super.call(this) || this;

switch (_a.label) {
case 0: return [4, this.input.iterator()];
case 0: return [4 /*yield*/, this.input.iterator()];
case 1:

@@ -68,3 +95,3 @@ inputIterator = _a.sent();

lineIterator = utf8Iterator.split('\n');
return [2, lineIterator];
return [2 /*return*/, lineIterator];
}

@@ -71,0 +98,0 @@ });

@@ -0,4 +1,34 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { ByteChunkIterator } from './iterators/byte_chunk_iterator';
/**
* Represents a data source readable as a stream of binary data chunks.
*
* Because `Dataset`s can be read repeatedly (via `Dataset.iterator()`), this
* provides a means to repeatedly create streams from the underlying data
* sources.
*/
export declare abstract class DataSource {
/**
* Obtain a new stream of binary data chunks.
*
* Starts the new stream from the beginning of the data source, even if other
* streams have been obtained previously.
*/
abstract iterator(): Promise<ByteChunkIterator>;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
Object.defineProperty(exports, "__esModule", { value: true });
var DataSource = (function () {
/**
* Represents a data source readable as a stream of binary data chunks.
*
* Because `Dataset`s can be read repeatedly (via `Dataset.iterator()`), this
* provides a means to repeatedly create streams from the underlying data
* sources.
*/
var DataSource = /** @class */ (function () {
function DataSource() {

@@ -9,2 +33,5 @@ }

exports.DataSource = DataSource;
// TODO(soergel): consider convenience factory functions here
// in combination with chainable source->dataset above, e.g.:
// tf.data.url(...).asCsvDataset().shuffle().batch()
//# sourceMappingURL=datasource.js.map

@@ -0,1 +1,17 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
export { array, Dataset, zip } from './dataset';

@@ -2,0 +18,0 @@ export { CSVDataset } from './datasets/csv_dataset';

"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
Object.defineProperty(exports, "__esModule", { value: true });

@@ -3,0 +19,0 @@ var dataset_1 = require("./dataset");

@@ -0,5 +1,31 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { LazyIterator } from './lazy_iterator';
import { StringIterator } from './string_iterator';
export declare abstract class ByteChunkIterator extends LazyIterator<Uint8Array> {
/**
* Decode a stream of UTF8-encoded byte arrays to a stream of strings.
*
* The byte arrays producetd from the ByteChunkIterator on which this is
* called will be interpreted as concatenated. No assumptions are made about
* the boundaries of the incoming chunks, so a multi-byte UTF8 encoding of a
* character may span the boundary between chunks. This naturally happens,
* for instance, when reading fixed-size byte arrays from a file.
*/
decodeUTF8(): StringIterator;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -51,3 +68,3 @@ var extendStatics = Object.setPrototypeOf ||

var string_iterator_1 = require("./string_iterator");
var ByteChunkIterator = (function (_super) {
var ByteChunkIterator = /** @class */ (function (_super) {
__extends(ByteChunkIterator, _super);

@@ -57,2 +74,11 @@ function ByteChunkIterator() {

}
/**
* Decode a stream of UTF8-encoded byte arrays to a stream of strings.
*
* The byte arrays producetd from the ByteChunkIterator on which this is
* called will be interpreted as concatenated. No assumptions are made about
* the boundaries of the incoming chunks, so a multi-byte UTF8 encoding of a
* character may span the boundary between chunks. This naturally happens,
* for instance, when reading fixed-size byte arrays from a file.
*/
ByteChunkIterator.prototype.decodeUTF8 = function () {

@@ -64,3 +90,12 @@ return new Utf8Iterator(this);

exports.ByteChunkIterator = ByteChunkIterator;
var Utf8Iterator = (function (_super) {
// ============================================================================
// The following private classes serve to implement the chainable methods
// on ByteChunkIterator. Unfortunately they can't be placed in separate files,
// due to resulting trouble with circular imports.
// ============================================================================
// We wanted multiple inheritance, e.g.
// class Utf8Iterator extends QueueIterator<string>, StringIterator
// but the TypeScript mixin approach is a bit hacky, so we take this adapter
// approach instead.
var Utf8Iterator = /** @class */ (function (_super) {
__extends(Utf8Iterator, _super);

@@ -79,3 +114,3 @@ function Utf8Iterator(upstream) {

return __generator(this, function (_a) {
return [2, this.impl.next()];
return [2 /*return*/, this.impl.next()];
});

@@ -86,3 +121,25 @@ });

}(string_iterator_1.StringIterator));
var Utf8IteratorImpl = (function (_super) {
/**
* Decode a stream of UTF8-encoded byte arrays to a stream of strings.
*
* This is tricky because the incoming byte array boundaries may disrupt a
* multi-byte UTF8 character. Thus any incomplete character data at the end of
* a chunk must be carried over and prepended to the next chunk before
* decoding. Luckily with native decoder, TextDecoder in browser and
* string_decoder in node, byte array boundaries are handled automatically.
*
* In the context of an input pipeline for machine learning, UTF8 decoding is
* needed to parse text files containing training examples or prediction
* requests (e.g., formatted as CSV or JSON). We cannot use the built-in
* decoding provided by FileReader.readAsText() because here we are in a
* streaming context, which FileReader does not support.
*
* @param upstream A `LazyIterator` of `Uint8Arrays` containing UTF8-encoded
* text, which should be interpreted as concatenated. No assumptions are
* made about the boundaries of the incoming chunks, so a multi-byte UTF8
* encoding of a character may span the boundary between chunks. This
* naturally happens, for instance, when reading fixed-size byte arrays from a
* file.
*/
var Utf8IteratorImpl = /** @class */ (function (_super) {
__extends(Utf8IteratorImpl, _super);

@@ -96,2 +153,3 @@ function Utf8IteratorImpl(upstream) {

else {
// tslint:disable-next-line:no-require-imports
var StringDecoder = require('string_decoder').StringDecoder;

@@ -110,7 +168,7 @@ _this.decoder = new StringDecoder('utf8');

switch (_a.label) {
case 0: return [4, this.upstream.next()];
case 0: return [4 /*yield*/, this.upstream.next()];
case 1:
chunkResult = _a.sent();
if (chunkResult.done) {
return [2, false];
return [2 /*return*/, false];
}

@@ -127,3 +185,3 @@ else {

this.outputQueue.push(text);
return [2, true];
return [2 /*return*/, true];
}

@@ -130,0 +188,0 @@ });

@@ -0,7 +1,33 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { FileElement } from '../types';
import { ByteChunkIterator } from './byte_chunk_iterator';
export interface FileChunkIteratorOptions {
/** The byte offset at which to begin reading the File or Blob. Default 0. */
offset?: number;
/** The number of bytes to read at a time. Default 1MB. */
chunkSize?: number;
}
/**
* Provide a stream of chunks from a File, Blob, or Uint8Array.
* @param file The source File, Blob or Uint8Array.
* @param options Optional settings controlling file reading.
* @returns a lazy Iterator of Uint8Arrays containing sequential chunks of the
* input File, Blob or Uint8Array.
*/
export declare class FileChunkIterator extends ByteChunkIterator {

@@ -8,0 +34,0 @@ protected file: FileElement;

"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -48,5 +65,13 @@ var extendStatics = Object.setPrototypeOf ||

Object.defineProperty(exports, "__esModule", { value: true });
// inspired by https://github.com/maxogden/filereader-stream
var tfjs_core_1 = require("@tensorflow/tfjs-core");
var byte_chunk_iterator_1 = require("./byte_chunk_iterator");
var FileChunkIterator = (function (_super) {
/**
* Provide a stream of chunks from a File, Blob, or Uint8Array.
* @param file The source File, Blob or Uint8Array.
* @param options Optional settings controlling file reading.
* @returns a lazy Iterator of Uint8Arrays containing sequential chunks of the
* input File, Blob or Uint8Array.
*/
var FileChunkIterator = /** @class */ (function (_super) {
__extends(FileChunkIterator, _super);

@@ -63,2 +88,3 @@ function FileChunkIterator(file, options) {

_this.offset = options.offset || 0;
// default 1MB chunk has tolerable perf on large files
_this.chunkSize = options.chunkSize || 1024 * 1024;

@@ -80,3 +106,3 @@ return _this;

this.file.size)) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}

@@ -86,8 +112,16 @@ chunk = new Promise(function (resolve, reject) {

if (_this.file instanceof Uint8Array) {
// Note if end > this.uint8Array.byteLength, we just get a small last
// chunk.
resolve(new Uint8Array(_this.file.slice(_this.offset, end)));
}
else {
// This branch assumes that this.file type is File or Blob, which
// means it is in the browser environment.
// TODO(soergel): is this a performance issue?
var fileReader_1 = new FileReader();
fileReader_1.onload = function (event) {
var data = fileReader_1.result;
// Not sure we can trust the return type of
// FileReader.readAsArrayBuffer See e.g.
// https://github.com/node-file-api/FileReader/issues/2
if (data instanceof ArrayBuffer) {

@@ -107,3 +141,7 @@ data = new Uint8Array(data);

};
// TODO(soergel): better handle onabort, onerror
// Note if end > this.file.size, we just get a small last chunk.
var slice = _this.file.slice(_this.offset, end);
// We can't use readAsText here (even if we know the file is text)
// because the slice boundary may fall within a multi-byte character.
fileReader_1.readAsArrayBuffer(slice);

@@ -114,4 +152,4 @@ }

_a = {};
return [4, chunk];
case 1: return [2, (_a.value = (_b.sent()), _a.done = false, _a)];
return [4 /*yield*/, chunk];
case 1: return [2 /*return*/, (_a.value = (_b.sent()), _a.done = false, _a)];
}

@@ -118,0 +156,0 @@ });

@@ -0,33 +1,331 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { DataElement, IteratorContainer } from '../types';
import { DeepMapResult } from '../util/deep_map';
import { RingBuffer } from '../util/ring_buffer';
/**
* Create a `LazyIterator` from an array of items.
*/
export declare function iteratorFromItems<T>(items: T[]): LazyIterator<T>;
/**
* Create a `LazyIterator` of incrementing integers.
*/
export declare function iteratorFromIncrementing(start: number): LazyIterator<number>;
/**
* Create a `LazyIterator` from a function.
*
* ```js
* let i = -1;
* const func = () =>
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
* const iter = tf.data.iteratorFromFunction(func);
* await iter.forEach(e => console.log(e));
* ```
*
* @param func A function that produces data on each call.
*/
export declare function iteratorFromFunction<T>(func: () => IteratorResult<T> | Promise<IteratorResult<T>>): LazyIterator<T>;
/**
* Create a `LazyIterator` by concatenating underlying streams, which are
* themselves provided as a stream.
*
* This can also be thought of as a "stream flatten" operation.
*
* @param baseIterators A stream of streams to be concatenated.
* @param baseErrorHandler An optional function that can intercept `Error`s
* raised during a `next()` call on the base stream. This function can decide
* whether the error should be propagated, whether the error should be
* ignored, or whether the base stream should be terminated.
*/
export declare function iteratorFromConcatenated<T>(baseIterators: LazyIterator<LazyIterator<T>>, baseErrorHandler?: (e: Error) => boolean): LazyIterator<T>;
/**
* Create a `LazyIterator` by concatenating streams produced by calling a
* stream-generating function a given number of times.
*
* Since a `LazyIterator` is read-once, it cannot be repeated, but this
* function can be used to achieve a similar effect:
*
* LazyIterator.ofConcatenatedFunction(() => new MyIterator(), 6);
*
* @param iteratorFunc: A function that produces a new stream on each call.
* @param count: The number of times to call the function.
* @param baseErrorHandler An optional function that can intercept `Error`s
* raised during a `next()` call on the base stream. This function can decide
* whether the error should be propagated, whether the error should be
* ignored, or whether the base stream should be terminated.
*/
export declare function iteratorFromConcatenatedFunction<T>(iteratorFunc: () => IteratorResult<LazyIterator<T>>, count: number, baseErrorHandler?: (e: Error) => boolean): LazyIterator<T>;
/**
* Create a `LazyIterator` by zipping together an array, dict, or nested
* structure of `LazyIterator`s (and perhaps additional constants).
*
* The underlying streams must provide elements in a consistent order such
* that they correspond.
*
* Typically, the underlying streams should have the same number of
* elements. If they do not, the behavior is determined by the
* `mismatchMode` argument.
*
* The nested structure of the `iterators` argument determines the
* structure of elements in the resulting iterator.
*
* @param iterators: An array or object containing LazyIterators at the
* leaves.
* @param mismatchMode: Determines what to do when one underlying iterator
* is exhausted before the others. `ZipMismatchMode.FAIL` (the default)
* causes an error to be thrown in this case. `ZipMismatchMode.SHORTEST`
* causes the zipped iterator to terminate with the furst underlying
* streams, so elements remaining on the longer streams are ignored.
* `ZipMismatchMode.LONGEST` causes the zipped stream to continue, filling
* in nulls for the exhausted streams, until all streams are exhausted.
*/
export declare function iteratorFromZipped<O extends DataElement>(iterators: IteratorContainer, mismatchMode?: ZipMismatchMode): LazyIterator<O>;
/**
* An asynchronous iterator, providing lazy access to a potentially
* unbounded stream of elements.
*
* Iterator can be obtained from a dataset:
* `const iter = await dataset.iterator();`
*/
export declare abstract class LazyIterator<T> {
abstract summary(): string;
/**
* Returns a `Promise` for the next element in the stream.
*
* When an item can be provided successfully, the return value is
* `{value:T, done:false}`.
*
* Calling next() on a closed stream returns `{value:null, done:true}`.
*/
abstract next(): Promise<IteratorResult<T>>;
/**
* Collect all remaining elements of a bounded stream into an array.
* Obviously this will succeed only for small streams that fit in memory.
* Useful for testing.
*
* @param maxItems the maximum number of items to return. If the stream
* terminates, fewer items will be returned. (default 1000)
* @param prefetch the size of the prefetch buffer to use when collecting
* items. Some amount of prefetch is important to test parallel streams,
* i.e. with multiple Promises outstanding. Without prefetch, this method
* makes purely serial next() calls.
*
* @returns A Promise for an array of stream elements, which will resolve
* when the stream is exhausted.
*/
collect(maxItems?: number, prefetch?: number): Promise<T[]>;
/**
* Draw items from the stream until it is exhausted.
*
* This can be useful when the stream has side effects but no output. In
* that case, calling this function guarantees that the stream will be
* fully processed.
*/
resolveFully(): Promise<void>;
/**
* Draw items from the stream until it is exhausted, or a predicate fails.
*
* This can be useful when the stream has side effects but no output. In
* that case, calling this function guarantees that the stream will be
* fully processed.
*/
resolveWhile(predicate: (r: T) => boolean): Promise<void>;
/**
* Handles errors thrown on this stream using a provided handler function.
*
* @param handler A function that handles any `Error` thrown during a `next()`
* call and returns true if the stream should continue (dropping the failed
* call) or false if the stream should quietly terminate. If the handler
* itself throws (or rethrows) an `Error`, that will be propagated.
*
* @returns A `LazyIterator` of elements passed through from upstream,
* possibly filtering or terminating on upstream `next()` calls that
* throw an `Error`.
*/
handleErrors(handler: (error: Error) => boolean): LazyIterator<T>;
/**
* Filters this stream according to `predicate`.
*
* @param predicate A function mapping a stream element to a boolean or a
* `Promise` for one.
*
* @returns A `LazyIterator` of elements for which the predicate was true.
*/
filter(predicate: (value: T) => boolean): LazyIterator<T>;
/**
* Maps this stream through a 1-to-1 transform.
*
* @param transform A function mapping a stream element to a transformed
* element.
*
* @returns A `LazyIterator` of transformed elements.
*/
map<O>(transform: (value: T) => O): LazyIterator<O>;
/**
* Maps this stream through an async 1-to-1 transform.
*
* @param transform A function mapping a stream element to a `Promise` for a
* transformed stream element.
*
* @returns A `LazyIterator` of transformed elements.
*/
mapAsync<O>(transform: (value: T) => Promise<O>): LazyIterator<O>;
/**
* Maps this stream through a 1-to-1 transform, forcing serial execution.
*
* @param transform A function mapping a stream element to a transformed
* element.
*
* @returns A `LazyIterator` of transformed elements.
*/
serialMapAsync<O>(transform: (value: T) => Promise<O>): LazyIterator<O>;
/**
* Maps this stream through a 1-to-many transform.
*
* @param transform A function mapping a stream element to an array of
* transformed elements.
*
* @returns A `DataStream` of transformed elements.
*/
flatmap<O>(transform: (value: T) => O[]): LazyIterator<O>;
/**
* Apply a function to every element of the stream.
*
* @param f A function to apply to each stream element.
*/
forEach(f: (value: T) => void): Promise<void>;
/**
* Apply a function to every element of the stream, forcing serial execution.
*
* @param f A function to apply to each stream element. Should return 'true'
* to indicate that the stream should continue, or 'false' to cause it to
* terminate.
*/
serialForEach(f: (value: T) => Promise<boolean>): Promise<void>;
/**
* Groups elements into batches, represented as arrays of elements.
*
* We can think of the elements of this iterator as 'rows' (even if they are
* nested structures). By the same token, consecutive values for a given
* key within the elements form a 'column'. This matches the usual sense of
* 'row' and 'column' when processing tabular data (e.g., parsing a CSV).
*
* Thus, "Row-major" means that the resulting batch is simply a collection of
* rows: `[row1, row2, row3, ...]`. This is contrast to the column-major
* form, which is needed for vectorized computation.
*
* @param batchSize The number of elements desired per batch.
* @param smallLastBatch Whether to emit the final batch when it has fewer
* than batchSize elements. Default true.
* @returns A `LazyIterator` of batches of elements, represented as arrays
* of the original element type.
*/
rowMajorBatch(batchSize: number, smallLastBatch?: boolean): LazyIterator<T[]>;
/**
* Groups elements into batches, represented in column-major form.
*
* We can think of the elements of this iterator as 'rows' (even if they are
* nested structures). By the same token, consecutive values for a given
* key within the elements form a 'column'. This matches the usual sense of
* 'row' and 'column' when processing tabular data (e.g., parsing a CSV).
*
* Thus, "column-major" means that the resulting batch is a (potentially
* nested) structure representing the columns. Each column entry, then,
* contains a collection of the values found in that column for a range of
* input elements. This representation allows for vectorized computation, in
* contrast to the row-major form.
*
* The inputs should all have the same nested structure (i.e., of arrays and
* dicts). The result is a single object with the same nested structure,
* where the leaves are arrays collecting the values of the inputs at that
* location (or, optionally, the result of a custom function applied to those
* arrays).
*
* @param batchSize The number of elements desired per batch.
* @param smallLastBatch Whether to emit the final batch when it has fewer
* than batchSize elements. Default true.
* @param zipFn: (optional) A function that expects an array of elements at a
* single node of the object tree, and returns a `DeepMapResult`. The
* `DeepMapResult` either provides a result value for that node (i.e.,
* representing the subtree), or indicates that the node should be processed
* recursively. The default zipFn recurses as far as possible and places
* arrays at the leaves.
* @returns A `LazyIterator` of batches of elements, represented as an object
* with collections at the leaves.
*/
columnMajorBatch(batchSize: number, smallLastBatch?: boolean, zipFn?: (xs: any[]) => DeepMapResult): LazyIterator<DataElement>;
/**
* Concatenate this `LazyIterator` with another.
*
* @param iterator A `LazyIterator` to be concatenated onto this one.
* @param baseErrorHandler An optional function that can intercept `Error`s
* raised during a `next()` call on the base stream. This function can
* decide whether the error should be propagated, whether the error should
* be ignored, or whether the base stream should be terminated.
* @returns A `LazyIterator`.
*/
concatenate(iterator: LazyIterator<T>, baseErrorHandler?: (e: Error) => boolean): LazyIterator<T>;
/**
* Limits this stream to return at most `count` items.
*
* @param count The maximum number of items to provide from the stream. If
* a negative or undefined value is given, the entire stream is returned
* unaltered.
*/
take(count: number): LazyIterator<T>;
/**
* Skips the first `count` items in this stream.
*
* @param count The number of items to skip. If a negative or undefined
* value is given, the entire stream is returned unaltered.
*/
skip(count: number): LazyIterator<T>;
/**
* Prefetch the first `bufferSize` items in this stream.
*
* Note this prefetches Promises, but makes no guarantees about when those
* Promises resolve.
*
* @param bufferSize: An integer specifying the number of elements to be
* prefetched.
*/
prefetch(bufferSize: number): LazyIterator<T>;
/**
* Randomly shuffles the elements of this stream.
*
* @param bufferSize: An integer specifying the number of elements from
* this stream from which the new stream will sample.
* @param seed: (Optional.) An integer specifying the random seed that
* will be used to create the distribution.
*/
shuffle(windowSize: number, seed?: string): LazyIterator<T>;
/**
* Force an iterator to execute serially: each next() call will await the
* prior one, so that they cannot execute concurrently.
*/
serial(): LazyIterator<T>;
}
/**
* A base class for transforming streams that operate by maintaining an
* output queue of elements that are ready to return via next(). This is
* commonly required when the transformation is 1-to-many: A call to next()
* may trigger a call to the underlying stream, which will produce many
* mapped elements of this stream-- of which we need to return only one, so
* we have to queue the rest.
*/
export declare abstract class OneToManyIterator<T> extends LazyIterator<T> {

@@ -38,5 +336,26 @@ private lastRead;

next(): Promise<IteratorResult<T>>;
/**
* Read one or more chunks from upstream and process them, possibly
* reading or writing a carryover, and adding processed items to the
* output queue. Note it's possible that no items are added to the queue
* on a given pump() call, even if the upstream stream is not closed
* (e.g., because items are filtered).
*
* @return `true` if any action was taken, i.e. fetching items from the
* upstream source OR adding items to the output queue. `false` if the
* upstream source is exhausted AND nothing was added to the queue
* (i.e., any remaining carryover).
*/
protected abstract pump(): Promise<boolean>;
serialNext(): Promise<IteratorResult<T>>;
}
/**
* Provides a `LazyIterator` that concatenates a stream of underlying
* streams.
*
* Doing this in a concurrency-safe way requires some trickery. In
* particular, we want this stream to return the elements from the
* underlying streams in the correct order according to when next() was
* called, even if the resulting Promises resolve in a different order.
*/
export declare class ChainedIterator<T> extends LazyIterator<T> {

@@ -57,2 +376,9 @@ private readonly baseErrorHandler?;

}
/**
* A stream that prefetches a given number of items from an upstream source,
* returning them in FIFO order.
*
* Note this prefetches Promises, but makes no guarantees about when those
* Promises resolve.
*/
export declare class PrefetchIterator<T> extends LazyIterator<T> {

@@ -64,5 +390,15 @@ protected upstream: LazyIterator<T>;

summary(): string;
/**
* Refill the prefetch buffer. Returns only after the buffer is full, or
* the upstream source is exhausted.
*/
protected refill(): void;
next(): Promise<IteratorResult<T>>;
}
/**
* A stream that performs a sliding-window random shuffle on an upstream
* source. This is like a `PrefetchIterator` except that the items are
* returned in randomized order. Mixing naturally improves as the buffer
* size increases.
*/
export declare class ShuffleIterator<T> extends PrefetchIterator<T> {

@@ -69,0 +405,0 @@ protected upstream: LazyIterator<T>;

"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -54,2 +71,8 @@ var extendStatics = Object.setPrototypeOf ||

var ring_buffer_1 = require("../util/ring_buffer");
// Here we implement a simple asynchronous iterator.
// This lets us avoid using either third-party stream libraries or
// recent TypeScript language support requiring polyfills.
/**
* Create a `LazyIterator` from an array of items.
*/
function iteratorFromItems(items) {

@@ -59,2 +82,5 @@ return new ArrayIterator(items);

exports.iteratorFromItems = iteratorFromItems;
/**
* Create a `LazyIterator` of incrementing integers.
*/
function iteratorFromIncrementing(start) {

@@ -65,2 +91,15 @@ var i = start;

exports.iteratorFromIncrementing = iteratorFromIncrementing;
/**
* Create a `LazyIterator` from a function.
*
* ```js
* let i = -1;
* const func = () =>
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
* const iter = tf.data.iteratorFromFunction(func);
* await iter.forEach(e => console.log(e));
* ```
*
* @param func A function that produces data on each call.
*/
function iteratorFromFunction(func) {

@@ -70,2 +109,14 @@ return new FunctionCallIterator(func);

exports.iteratorFromFunction = iteratorFromFunction;
/**
* Create a `LazyIterator` by concatenating underlying streams, which are
* themselves provided as a stream.
*
* This can also be thought of as a "stream flatten" operation.
*
* @param baseIterators A stream of streams to be concatenated.
* @param baseErrorHandler An optional function that can intercept `Error`s
* raised during a `next()` call on the base stream. This function can decide
* whether the error should be propagated, whether the error should be
* ignored, or whether the base stream should be terminated.
*/
function iteratorFromConcatenated(baseIterators, baseErrorHandler) {

@@ -75,2 +126,18 @@ return new ChainedIterator(baseIterators, baseErrorHandler);

exports.iteratorFromConcatenated = iteratorFromConcatenated;
/**
* Create a `LazyIterator` by concatenating streams produced by calling a
* stream-generating function a given number of times.
*
* Since a `LazyIterator` is read-once, it cannot be repeated, but this
* function can be used to achieve a similar effect:
*
* LazyIterator.ofConcatenatedFunction(() => new MyIterator(), 6);
*
* @param iteratorFunc: A function that produces a new stream on each call.
* @param count: The number of times to call the function.
* @param baseErrorHandler An optional function that can intercept `Error`s
* raised during a `next()` call on the base stream. This function can decide
* whether the error should be propagated, whether the error should be
* ignored, or whether the base stream should be terminated.
*/
function iteratorFromConcatenatedFunction(iteratorFunc, count, baseErrorHandler) {

@@ -80,2 +147,26 @@ return iteratorFromConcatenated(iteratorFromFunction(iteratorFunc).take(count), baseErrorHandler);

exports.iteratorFromConcatenatedFunction = iteratorFromConcatenatedFunction;
/**
* Create a `LazyIterator` by zipping together an array, dict, or nested
* structure of `LazyIterator`s (and perhaps additional constants).
*
* The underlying streams must provide elements in a consistent order such
* that they correspond.
*
* Typically, the underlying streams should have the same number of
* elements. If they do not, the behavior is determined by the
* `mismatchMode` argument.
*
* The nested structure of the `iterators` argument determines the
* structure of elements in the resulting iterator.
*
* @param iterators: An array or object containing LazyIterators at the
* leaves.
* @param mismatchMode: Determines what to do when one underlying iterator
* is exhausted before the others. `ZipMismatchMode.FAIL` (the default)
* causes an error to be thrown in this case. `ZipMismatchMode.SHORTEST`
* causes the zipped iterator to terminate with the furst underlying
* streams, so elements remaining on the longer streams are ignored.
* `ZipMismatchMode.LONGEST` causes the zipped stream to continue, filling
* in nulls for the exhausted streams, until all streams are exhausted.
*/
function iteratorFromZipped(iterators, mismatchMode) {

@@ -86,5 +177,27 @@ if (mismatchMode === void 0) { mismatchMode = ZipMismatchMode.FAIL; }

exports.iteratorFromZipped = iteratorFromZipped;
var LazyIterator = (function () {
/**
* An asynchronous iterator, providing lazy access to a potentially
* unbounded stream of elements.
*
* Iterator can be obtained from a dataset:
* `const iter = await dataset.iterator();`
*/
var LazyIterator = /** @class */ (function () {
function LazyIterator() {
}
/**
* Collect all remaining elements of a bounded stream into an array.
* Obviously this will succeed only for small streams that fit in memory.
* Useful for testing.
*
* @param maxItems the maximum number of items to return. If the stream
* terminates, fewer items will be returned. (default 1000)
* @param prefetch the size of the prefetch buffer to use when collecting
* items. Some amount of prefetch is important to test parallel streams,
* i.e. with multiple Promises outstanding. Without prefetch, this method
* makes purely serial next() calls.
*
* @returns A Promise for an array of stream elements, which will resolve
* when the stream is exhausted.
*/
LazyIterator.prototype.collect = function (maxItems, prefetch) {

@@ -101,3 +214,3 @@ if (maxItems === void 0) { maxItems = 1000; }

count = 0;
return [4, stream.next()];
return [4 /*yield*/, stream.next()];
case 1:

@@ -107,13 +220,13 @@ x = _a.sent();

case 2:
if (!!x.done) return [3, 4];
if (!!x.done) return [3 /*break*/, 4];
result.push(x.value);
count++;
if (count >= maxItems) {
return [2, result];
return [2 /*return*/, result];
}
return [4, stream.next()];
return [4 /*yield*/, stream.next()];
case 3:
x = _a.sent();
return [3, 2];
case 4: return [2, result];
return [3 /*break*/, 2];
case 4: return [2 /*return*/, result];
}

@@ -123,2 +236,9 @@ });

};
/**
* Draw items from the stream until it is exhausted.
*
* This can be useful when the stream has side effects but no output. In
* that case, calling this function guarantees that the stream will be
* fully processed.
*/
LazyIterator.prototype.resolveFully = function () {

@@ -129,3 +249,3 @@ return __awaiter(this, void 0, void 0, function () {

switch (_a.label) {
case 0: return [4, this.next()];
case 0: return [4 /*yield*/, this.next()];
case 1:

@@ -135,8 +255,8 @@ x = _a.sent();

case 2:
if (!!x.done) return [3, 4];
return [4, this.next()];
if (!!x.done) return [3 /*break*/, 4];
return [4 /*yield*/, this.next()];
case 3:
x = _a.sent();
return [3, 2];
case 4: return [2];
return [3 /*break*/, 2];
case 4: return [2 /*return*/];
}

@@ -146,2 +266,9 @@ });

};
/**
* Draw items from the stream until it is exhausted, or a predicate fails.
*
* This can be useful when the stream has side effects but no output. In
* that case, calling this function guarantees that the stream will be
* fully processed.
*/
LazyIterator.prototype.resolveWhile = function (predicate) {

@@ -152,3 +279,3 @@ return __awaiter(this, void 0, void 0, function () {

switch (_a.label) {
case 0: return [4, this.next()];
case 0: return [4 /*yield*/, this.next()];
case 1:

@@ -159,9 +286,9 @@ x = _a.sent();

case 2:
if (!((!x.done) && shouldContinue)) return [3, 4];
return [4, this.next()];
if (!((!x.done) && shouldContinue)) return [3 /*break*/, 4];
return [4 /*yield*/, this.next()];
case 3:
x = _a.sent();
shouldContinue = predicate(x.value);
return [3, 2];
case 4: return [2];
return [3 /*break*/, 2];
case 4: return [2 /*return*/];
}

@@ -171,34 +298,117 @@ });

};
/**
* Handles errors thrown on this stream using a provided handler function.
*
* @param handler A function that handles any `Error` thrown during a `next()`
* call and returns true if the stream should continue (dropping the failed
* call) or false if the stream should quietly terminate. If the handler
* itself throws (or rethrows) an `Error`, that will be propagated.
*
* @returns A `LazyIterator` of elements passed through from upstream,
* possibly filtering or terminating on upstream `next()` calls that
* throw an `Error`.
*/
LazyIterator.prototype.handleErrors = function (handler) {
return new ErrorHandlingLazyIterator(this, handler);
};
// TODO(soergel): Implement reduce() etc.
/**
* Filters this stream according to `predicate`.
*
* @param predicate A function mapping a stream element to a boolean or a
* `Promise` for one.
*
* @returns A `LazyIterator` of elements for which the predicate was true.
*/
LazyIterator.prototype.filter = function (predicate) {
return new FilterIterator(this, predicate);
};
/**
* Maps this stream through a 1-to-1 transform.
*
* @param transform A function mapping a stream element to a transformed
* element.
*
* @returns A `LazyIterator` of transformed elements.
*/
LazyIterator.prototype.map = function (transform) {
return new MapIterator(this, transform);
};
/**
* Maps this stream through an async 1-to-1 transform.
*
* @param transform A function mapping a stream element to a `Promise` for a
* transformed stream element.
*
* @returns A `LazyIterator` of transformed elements.
*/
LazyIterator.prototype.mapAsync = function (transform) {
return new AsyncMapIterator(this, transform);
};
/**
* Maps this stream through a 1-to-1 transform, forcing serial execution.
*
* @param transform A function mapping a stream element to a transformed
* element.
*
* @returns A `LazyIterator` of transformed elements.
*/
LazyIterator.prototype.serialMapAsync = function (transform) {
return new AsyncMapIterator(this, transform).serial();
};
/**
* Maps this stream through a 1-to-many transform.
*
* @param transform A function mapping a stream element to an array of
* transformed elements.
*
* @returns A `DataStream` of transformed elements.
*/
LazyIterator.prototype.flatmap = function (transform) {
return new FlatmapIterator(this, transform);
};
/**
* Apply a function to every element of the stream.
*
* @param f A function to apply to each stream element.
*/
LazyIterator.prototype.forEach = function (f) {
return __awaiter(this, void 0, void 0, function () {
return __generator(this, function (_a) {
return [2, this.map(f).resolveFully()];
return [2 /*return*/, this.map(f).resolveFully()];
});
});
};
/**
* Apply a function to every element of the stream, forcing serial execution.
*
* @param f A function to apply to each stream element. Should return 'true'
* to indicate that the stream should continue, or 'false' to cause it to
* terminate.
*/
LazyIterator.prototype.serialForEach = function (f) {
return __awaiter(this, void 0, void 0, function () {
return __generator(this, function (_a) {
return [2, this.serialMapAsync(f).resolveWhile(function (x) { return (x === true); })];
return [2 /*return*/, this.serialMapAsync(f).resolveWhile(function (x) { return (x === true); })];
});
});
};
/**
* Groups elements into batches, represented as arrays of elements.
*
* We can think of the elements of this iterator as 'rows' (even if they are
* nested structures). By the same token, consecutive values for a given
* key within the elements form a 'column'. This matches the usual sense of
* 'row' and 'column' when processing tabular data (e.g., parsing a CSV).
*
* Thus, "Row-major" means that the resulting batch is simply a collection of
* rows: `[row1, row2, row3, ...]`. This is contrast to the column-major
* form, which is needed for vectorized computation.
*
* @param batchSize The number of elements desired per batch.
* @param smallLastBatch Whether to emit the final batch when it has fewer
* than batchSize elements. Default true.
* @returns A `LazyIterator` of batches of elements, represented as arrays
* of the original element type.
*/
LazyIterator.prototype.rowMajorBatch = function (batchSize, smallLastBatch) {

@@ -208,11 +418,67 @@ if (smallLastBatch === void 0) { smallLastBatch = true; }

};
LazyIterator.prototype.columnMajorBatch = function (batchSize, smallLastBatch, zipFn) {
/**
* Groups elements into batches, represented in column-major form.
*
* We can think of the elements of this iterator as 'rows' (even if they are
* nested structures). By the same token, consecutive values for a given
* key within the elements form a 'column'. This matches the usual sense of
* 'row' and 'column' when processing tabular data (e.g., parsing a CSV).
*
* Thus, "column-major" means that the resulting batch is a (potentially
* nested) structure representing the columns. Each column entry, then,
* contains a collection of the values found in that column for a range of
* input elements. This representation allows for vectorized computation, in
* contrast to the row-major form.
*
* The inputs should all have the same nested structure (i.e., of arrays and
* dicts). The result is a single object with the same nested structure,
* where the leaves are arrays collecting the values of the inputs at that
* location (or, optionally, the result of a custom function applied to those
* arrays).
*
* @param batchSize The number of elements desired per batch.
* @param smallLastBatch Whether to emit the final batch when it has fewer
* than batchSize elements. Default true.
* @param zipFn: (optional) A function that expects an array of elements at a
* single node of the object tree, and returns a `DeepMapResult`. The
* `DeepMapResult` either provides a result value for that node (i.e.,
* representing the subtree), or indicates that the node should be processed
* recursively. The default zipFn recurses as far as possible and places
* arrays at the leaves.
* @returns A `LazyIterator` of batches of elements, represented as an object
* with collections at the leaves.
*/
LazyIterator.prototype.columnMajorBatch = function (batchSize, smallLastBatch,
// tslint:disable-next-line:no-any
zipFn) {
if (smallLastBatch === void 0) { smallLastBatch = true; }
if (zipFn === void 0) { zipFn = deep_map_1.zipToList; }
if (zipFn === void 0) {
// tslint:disable-next-line:no-any
zipFn = deep_map_1.zipToList; }
// First collect the desired number of input elements as a row-major batch.
var rowBatches = this.rowMajorBatch(batchSize, smallLastBatch);
// Now 'rotate' or 'pivot' the data, collecting all values from each column
// in the batch (i.e., for each key within the elements) into an array.
return rowBatches.map(function (x) { return deep_map_1.deepZip(x, zipFn); });
};
/**
* Concatenate this `LazyIterator` with another.
*
* @param iterator A `LazyIterator` to be concatenated onto this one.
* @param baseErrorHandler An optional function that can intercept `Error`s
* raised during a `next()` call on the base stream. This function can
* decide whether the error should be propagated, whether the error should
* be ignored, or whether the base stream should be terminated.
* @returns A `LazyIterator`.
*/
LazyIterator.prototype.concatenate = function (iterator, baseErrorHandler) {
return new ChainedIterator(iteratorFromItems([this, iterator]), baseErrorHandler);
};
/**
* Limits this stream to return at most `count` items.
*
* @param count The maximum number of items to provide from the stream. If
* a negative or undefined value is given, the entire stream is returned
* unaltered.
*/
LazyIterator.prototype.take = function (count) {

@@ -224,2 +490,8 @@ if (count < 0 || count == null) {

};
/**
* Skips the first `count` items in this stream.
*
* @param count The number of items to skip. If a negative or undefined
* value is given, the entire stream is returned unaltered.
*/
LazyIterator.prototype.skip = function (count) {

@@ -231,8 +503,30 @@ if (count < 0 || count == null) {

};
/**
* Prefetch the first `bufferSize` items in this stream.
*
* Note this prefetches Promises, but makes no guarantees about when those
* Promises resolve.
*
* @param bufferSize: An integer specifying the number of elements to be
* prefetched.
*/
LazyIterator.prototype.prefetch = function (bufferSize) {
return new PrefetchIterator(this, bufferSize);
};
// TODO(soergel): deep sharded shuffle, where supported
/**
* Randomly shuffles the elements of this stream.
*
* @param bufferSize: An integer specifying the number of elements from
* this stream from which the new stream will sample.
* @param seed: (Optional.) An integer specifying the random seed that
* will be used to create the distribution.
*/
LazyIterator.prototype.shuffle = function (windowSize, seed) {
return new ShuffleIterator(this, windowSize, seed);
};
/**
* Force an iterator to execute serially: each next() call will await the
* prior one, so that they cannot execute concurrently.
*/
LazyIterator.prototype.serial = function () {

@@ -244,3 +538,10 @@ return new SerialIterator(this);

exports.LazyIterator = LazyIterator;
var ArrayIterator = (function (_super) {
// ============================================================================
// The following private classes serve to implement the chainable methods
// on LazyIterator. Unfortunately they can't be placed in separate files,
// due to resulting trouble with circular imports.
// ============================================================================
// Iterators that just extend LazyIterator directly
// ============================================================================
var ArrayIterator = /** @class */ (function (_super) {
__extends(ArrayIterator, _super);

@@ -261,3 +562,3 @@ function ArrayIterator(items) {

if (this.trav >= this.items.length) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}

@@ -272,3 +573,3 @@ item = this.items[this.trav];

this.trav++;
return [2, { value: result, done: false }];
return [2 /*return*/, { value: result, done: false }];
});

@@ -279,3 +580,3 @@ });

}(LazyIterator));
var FunctionCallIterator = (function (_super) {
var FunctionCallIterator = /** @class */ (function (_super) {
__extends(FunctionCallIterator, _super);

@@ -294,5 +595,6 @@ function FunctionCallIterator(nextFn) {

try {
return [2, this.nextFn()];
return [2 /*return*/, this.nextFn()];
}
catch (e) {
// Modify the error message but leave the stack trace intact
e.message =

@@ -302,3 +604,3 @@ "Error thrown while iterating through a dataset: " + e.message;

}
return [2];
return [2 /*return*/];
});

@@ -309,3 +611,3 @@ });

}(LazyIterator));
var SerialIterator = (function (_super) {
var SerialIterator = /** @class */ (function (_super) {
__extends(SerialIterator, _super);

@@ -325,4 +627,8 @@ function SerialIterator(upstream) {

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -334,3 +640,3 @@ });

return __generator(this, function (_a) {
return [2, this.upstream.next()];
return [2 /*return*/, this.upstream.next()];
});

@@ -341,3 +647,3 @@ });

}(LazyIterator));
var SkipIterator = (function (_super) {
var SkipIterator = /** @class */ (function (_super) {
__extends(SkipIterator, _super);

@@ -348,2 +654,3 @@ function SkipIterator(upstream, maxCount) {

_this.maxCount = maxCount;
// Local state that should not be clobbered by out-of-order execution.
_this.count = 0;

@@ -360,4 +667,8 @@ _this.lastRead = Promise.resolve({ value: null, done: false });

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -372,12 +683,13 @@ });

case 0:
if (!(this.count++ < this.maxCount)) return [3, 2];
return [4, this.upstream.next()];
if (!(this.count++ < this.maxCount)) return [3 /*break*/, 2];
return [4 /*yield*/, this.upstream.next()];
case 1:
skipped = _a.sent();
// short-circuit if upstream is already empty
if (skipped.done) {
return [2, skipped];
return [2 /*return*/, skipped];
}
tf.dispose(skipped.value);
return [3, 0];
case 2: return [2, this.upstream.next()];
return [3 /*break*/, 0];
case 2: return [2 /*return*/, this.upstream.next()];
}

@@ -389,3 +701,3 @@ });

}(LazyIterator));
var TakeIterator = (function (_super) {
var TakeIterator = /** @class */ (function (_super) {
__extends(TakeIterator, _super);

@@ -406,5 +718,5 @@ function TakeIterator(upstream, maxCount) {

if (this.count++ >= this.maxCount) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}
return [2, this.upstream.next()];
return [2 /*return*/, this.upstream.next()];
});

@@ -415,3 +727,6 @@ });

}(LazyIterator));
var RowMajorBatchIterator = (function (_super) {
// Note this batch just groups items into row-wise element arrays.
// Rotating these to a column-wise representation happens only at the dataset
// level.
var RowMajorBatchIterator = /** @class */ (function (_super) {
__extends(RowMajorBatchIterator, _super);

@@ -434,4 +749,8 @@ function RowMajorBatchIterator(upstream, batchSize, enableSmallLastBatch) {

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -449,4 +768,4 @@ });

case 1:
if (!(batch.length < this.batchSize)) return [3, 3];
return [4, this.upstream.next()];
if (!(batch.length < this.batchSize)) return [3 /*break*/, 3];
return [4 /*yield*/, this.upstream.next()];
case 2:

@@ -456,9 +775,9 @@ item = _a.sent();

if (this.enableSmallLastBatch && batch.length > 0) {
return [2, { value: batch, done: false }];
return [2 /*return*/, { value: batch, done: false }];
}
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}
batch.push(item.value);
return [3, 1];
case 3: return [2, { value: batch, done: false }];
return [3 /*break*/, 1];
case 3: return [2 /*return*/, { value: batch, done: false }];
}

@@ -470,3 +789,3 @@ });

}(LazyIterator));
var FilterIterator = (function (_super) {
var FilterIterator = /** @class */ (function (_super) {
__extends(FilterIterator, _super);

@@ -487,4 +806,8 @@ function FilterIterator(upstream, predicate) {

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -499,12 +822,12 @@ });

case 0:
if (!true) return [3, 2];
return [4, this.upstream.next()];
if (!true) return [3 /*break*/, 2];
return [4 /*yield*/, this.upstream.next()];
case 1:
item = _a.sent();
if (item.done || this.predicate(item.value)) {
return [2, item];
return [2 /*return*/, item];
}
tf.dispose(item.value);
return [3, 0];
case 2: return [2];
return [3 /*break*/, 0];
case 2: return [2 /*return*/];
}

@@ -516,3 +839,3 @@ });

}(LazyIterator));
var MapIterator = (function (_super) {
var MapIterator = /** @class */ (function (_super) {
__extends(MapIterator, _super);

@@ -533,7 +856,7 @@ function MapIterator(upstream, transform) {

switch (_a.label) {
case 0: return [4, this.upstream.next()];
case 0: return [4 /*yield*/, this.upstream.next()];
case 1:
item = _a.sent();
if (item.done) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}

@@ -543,2 +866,4 @@ inputTensors = tensor_util_1.getTensorsInContainer(item.value);

outputTensors = tensor_util_1.getTensorsInContainer(mapped);
// TODO(soergel) faster intersection
// TODO(soergel) move to tf.disposeExcept(in, out)?
for (_i = 0, inputTensors_1 = inputTensors; _i < inputTensors_1.length; _i++) {

@@ -550,3 +875,3 @@ t = inputTensors_1[_i];

}
return [2, { value: mapped, done: false }];
return [2 /*return*/, { value: mapped, done: false }];
}

@@ -558,3 +883,3 @@ });

}(LazyIterator));
var ErrorHandlingLazyIterator = (function (_super) {
var ErrorHandlingLazyIterator = /** @class */ (function (_super) {
__extends(ErrorHandlingLazyIterator, _super);

@@ -576,4 +901,8 @@ function ErrorHandlingLazyIterator(upstream, handler) {

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -588,16 +917,16 @@ });

case 0:
if (!true) return [3, 5];
if (!true) return [3 /*break*/, 5];
_a.label = 1;
case 1:
_a.trys.push([1, 3, , 4]);
return [4, this.upstream.next()];
case 2: return [2, _a.sent()];
return [4 /*yield*/, this.upstream.next()];
case 2: return [2 /*return*/, _a.sent()];
case 3:
e_1 = _a.sent();
if (!this.handler(e_1)) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}
return [3, 4];
case 4: return [3, 0];
case 5: return [2];
return [3 /*break*/, 4];
case 4: return [3 /*break*/, 0];
case 5: return [2 /*return*/];
}

@@ -609,3 +938,3 @@ });

}(LazyIterator));
var AsyncMapIterator = (function (_super) {
var AsyncMapIterator = /** @class */ (function (_super) {
__extends(AsyncMapIterator, _super);

@@ -626,13 +955,15 @@ function AsyncMapIterator(upstream, transform) {

switch (_a.label) {
case 0: return [4, this.upstream.next()];
case 0: return [4 /*yield*/, this.upstream.next()];
case 1:
item = _a.sent();
if (item.done) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}
inputTensors = tensor_util_1.getTensorsInContainer(item.value);
return [4, this.transform(item.value)];
return [4 /*yield*/, this.transform(item.value)];
case 2:
mapped = _a.sent();
outputTensors = tensor_util_1.getTensorsInContainer(mapped);
// TODO(soergel) faster intersection
// TODO(soergel) move to tf.disposeExcept(in, out)?
for (_i = 0, inputTensors_2 = inputTensors; _i < inputTensors_2.length; _i++) {

@@ -644,3 +975,3 @@ t = inputTensors_2[_i];

}
return [2, { value: mapped, done: false }];
return [2 /*return*/, { value: mapped, done: false }];
}

@@ -652,3 +983,13 @@ });

}(LazyIterator));
var OneToManyIterator = (function (_super) {
// Iterators that maintain a queue of pending items
// ============================================================================
/**
* A base class for transforming streams that operate by maintaining an
* output queue of elements that are ready to return via next(). This is
* commonly required when the transformation is 1-to-many: A call to next()
* may trigger a call to the underlying stream, which will produce many
* mapped elements of this stream-- of which we need to return only one, so
* we have to queue the rest.
*/
var OneToManyIterator = /** @class */ (function (_super) {
__extends(OneToManyIterator, _super);

@@ -665,4 +1006,8 @@ function OneToManyIterator() {

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -676,10 +1021,11 @@ });

case 0:
if (!(this.outputQueue.length() === 0)) return [3, 2];
return [4, this.pump()];
if (!(this.outputQueue.length() === 0)) return [3 /*break*/, 2];
return [4 /*yield*/, this.pump()];
case 1:
// TODO(soergel): consider parallel reads.
if (!(_a.sent())) {
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
}
return [3, 0];
case 2: return [2, { value: this.outputQueue.shift(), done: false }];
return [3 /*break*/, 0];
case 2: return [2 /*return*/, { value: this.outputQueue.shift(), done: false }];
}

@@ -692,3 +1038,3 @@ });

exports.OneToManyIterator = OneToManyIterator;
var FlatmapIterator = (function (_super) {
var FlatmapIterator = /** @class */ (function (_super) {
__extends(FlatmapIterator, _super);

@@ -709,7 +1055,7 @@ function FlatmapIterator(upstream, transform) {

switch (_a.label) {
case 0: return [4, this.upstream.next()];
case 0: return [4 /*yield*/, this.upstream.next()];
case 1:
item = _a.sent();
if (item.done) {
return [2, false];
return [2 /*return*/, false];
}

@@ -720,2 +1066,4 @@ inputTensors = tensor_util_1.getTensorsInContainer(item.value);

this.outputQueue.pushAll(mappedArray);
// TODO(soergel) faster intersection, and deduplicate outputTensors
// TODO(soergel) move to tf.disposeExcept(in, out)?
for (_i = 0, inputTensors_3 = inputTensors; _i < inputTensors_3.length; _i++) {

@@ -727,3 +1075,3 @@ t = inputTensors_3[_i];

}
return [2, true];
return [2 /*return*/, true];
}

@@ -735,3 +1083,12 @@ });

}(OneToManyIterator));
var ChainedIterator = (function (_super) {
/**
* Provides a `LazyIterator` that concatenates a stream of underlying
* streams.
*
* Doing this in a concurrency-safe way requires some trickery. In
* particular, we want this stream to return the elements from the
* underlying streams in the correct order according to when next() was
* called, even if the resulting Promises resolve in a different order.
*/
var ChainedIterator = /** @class */ (function (_super) {
__extends(ChainedIterator, _super);

@@ -741,3 +1098,6 @@ function ChainedIterator(iterators, baseErrorHandler) {

_this.baseErrorHandler = baseErrorHandler;
// Strict Promise execution order:
// a next() call may not even begin until the previous one completes.
_this.lastRead = null;
// Local state that should not be clobbered by out-of-order execution.
_this.iterator = null;

@@ -755,3 +1115,3 @@ _this.moreIterators = iterators;

this.lastRead = this.readFromChain(this.lastRead);
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -765,11 +1125,23 @@ });

switch (_a.label) {
case 0: return [4, lastRead];
case 0:
// Must await on the previous read since the previous read may have advanced
// the stream of streams, from which we need to read.
// This is unfortunate since we can't parallelize reads. Which means
// prefetching of chained streams is a no-op.
// One solution is to prefetch immediately upstream of this.
return [4 /*yield*/, lastRead];
case 1:
// Must await on the previous read since the previous read may have advanced
// the stream of streams, from which we need to read.
// This is unfortunate since we can't parallelize reads. Which means
// prefetching of chained streams is a no-op.
// One solution is to prefetch immediately upstream of this.
_a.sent();
if (!(this.iterator == null)) return [3, 3];
return [4, this.moreIterators.next()];
if (!(this.iterator == null)) return [3 /*break*/, 3];
return [4 /*yield*/, this.moreIterators.next()];
case 2:
iteratorResult = _a.sent();
if (iteratorResult.done) {
return [2, { value: null, done: true }];
// No more streams to stream from.
return [2 /*return*/, { value: null, done: true }];
}

@@ -781,3 +1153,3 @@ this.iterator = iteratorResult.value;

_a.label = 3;
case 3: return [4, this.iterator.next()];
case 3: return [4 /*yield*/, this.iterator.next()];
case 4:

@@ -787,5 +1159,5 @@ itemResult = _a.sent();

this.iterator = null;
return [2, this.readFromChain(lastRead)];
return [2 /*return*/, this.readFromChain(lastRead)];
}
return [2, itemResult];
return [2 /*return*/, itemResult];
}

@@ -802,5 +1174,34 @@ });

ZipMismatchMode[ZipMismatchMode["SHORTEST"] = 1] = "SHORTEST";
ZipMismatchMode[ZipMismatchMode["LONGEST"] = 2] = "LONGEST";
ZipMismatchMode[ZipMismatchMode["LONGEST"] = 2] = "LONGEST"; // use nulls for exhausted streams; use up the longest stream.
})(ZipMismatchMode = exports.ZipMismatchMode || (exports.ZipMismatchMode = {}));
var ZipIterator = (function (_super) {
/**
* Provides a `LazyIterator` that zips together an array, dict, or nested
* structure of `LazyIterator`s (and perhaps additional constants).
*
* The underlying streams must provide elements in a consistent order such
* that they correspond.
*
* Typically, the underlying streams should have the same number of
* elements. If they do not, the behavior is determined by the
* `mismatchMode` argument.
*
* The nested structure of the `iterators` argument determines the
* structure of elements in the resulting iterator.
*
* Doing this in a concurrency-safe way requires some trickery. In
* particular, we want this stream to return the elements from the
* underlying streams in the correct order according to when next() was
* called, even if the resulting Promises resolve in a different order.
*
* @param iterators: An array or object containing LazyIterators at the
* leaves.
* @param mismatchMode: Determines what to do when one underlying iterator
* is exhausted before the others. `ZipMismatchMode.FAIL` (the default)
* causes an error to be thrown in this case. `ZipMismatchMode.SHORTEST`
* causes the zipped iterator to terminate with the furst underlying
* streams, so elements remaining on the longer streams are ignored.
* `ZipMismatchMode.LONGEST` causes the zipped stream to continue, filling
* in nulls for the exhausted streams, until all streams are exhausted.
*/
var ZipIterator = /** @class */ (function (_super) {
__extends(ZipIterator, _super);

@@ -843,12 +1244,18 @@ function ZipIterator(iterators, mismatchMode) {

switch (_a.label) {
case 0: return [4, afterState];
case 0:
// This chaining ensures that the underlying next() are not even called
// before the previous ones have resolved.
return [4 /*yield*/, afterState];
case 1:
// This chaining ensures that the underlying next() are not even called
// before the previous ones have resolved.
_a.sent();
numIterators = 0;
iteratorsDone = 0;
return [4, deep_map_1.deepMapAndAwaitAll(this.iterators, getNext)];
return [4 /*yield*/, deep_map_1.deepMapAndAwaitAll(this.iterators, getNext)];
case 2:
mapped = _a.sent();
if (numIterators === iteratorsDone) {
return [2, { value: null, done: true }];
// The streams have all ended.
return [2 /*return*/, { value: null, done: true }];
}

@@ -861,9 +1268,10 @@ if (iteratorsDone > 0) {

case ZipMismatchMode.SHORTEST:
return [2, { value: null, done: true }];
return [2 /*return*/, { value: null, done: true }];
case ZipMismatchMode.LONGEST:
default:
// Continue. The exhausted streams already produced value: null.
}
}
this.count++;
return [2, { value: mapped, done: false }];
return [2 /*return*/, { value: mapped, done: false }];
}

@@ -879,4 +1287,4 @@ });

this.currentPromise = this.nextState(this.currentPromise);
return [4, this.currentPromise];
case 1: return [2, (_a.sent())];
return [4 /*yield*/, this.currentPromise];
case 1: return [2 /*return*/, (_a.sent())];
}

@@ -888,3 +1296,12 @@ });

}(LazyIterator));
var PrefetchIterator = (function (_super) {
// Iterators that maintain a ring buffer of pending promises
// ============================================================================
/**
* A stream that prefetches a given number of items from an upstream source,
* returning them in FIFO order.
*
* Note this prefetches Promises, but makes no guarantees about when those
* Promises resolve.
*/
var PrefetchIterator = /** @class */ (function (_super) {
__extends(PrefetchIterator, _super);

@@ -901,2 +1318,6 @@ function PrefetchIterator(upstream, bufferSize) {

};
/**
* Refill the prefetch buffer. Returns only after the buffer is full, or
* the upstream source is exhausted.
*/
PrefetchIterator.prototype.refill = function () {

@@ -910,2 +1331,5 @@ while (!this.buffer.isFull()) {

this.refill();
// This shift will never throw an error because the buffer is always
// full after a refill. If the stream is exhausted, the buffer will be
// full of Promises that will resolve to the end-of-stream signal.
return this.buffer.shift();

@@ -916,3 +1340,9 @@ };

exports.PrefetchIterator = PrefetchIterator;
var ShuffleIterator = (function (_super) {
/**
* A stream that performs a sliding-window random shuffle on an upstream
* source. This is like a `PrefetchIterator` except that the items are
* returned in randomized order. Mixing naturally improves as the buffer
* size increases.
*/
var ShuffleIterator = /** @class */ (function (_super) {
__extends(ShuffleIterator, _super);

@@ -923,2 +1353,3 @@ function ShuffleIterator(upstream, windowSize, seed) {

_this.windowSize = windowSize;
// Local state that should not be clobbered by out-of-order execution.
_this.upstreamExhausted = false;

@@ -933,4 +1364,8 @@ _this.random = seedrandom.alea(seed || tf.util.now().toString());

return __generator(this, function (_a) {
// This sets this.lastRead to a new Promise right away, as opposed to
// saying `await this.lastRead; this.lastRead = this.serialNext();` which
// would not work because this.nextRead would be updated only after the
// promise resolves.
this.lastRead = this.lastRead.then(function () { return _this.serialNext(); });
return [2, this.lastRead];
return [2 /*return*/, this.lastRead];
});

@@ -951,2 +1386,3 @@ });

case 0:
// TODO(soergel): consider performance
if (!this.upstreamExhausted) {

@@ -957,5 +1393,5 @@ this.refill();

case 1:
if (!!this.buffer.isEmpty()) return [3, 3];
if (!!this.buffer.isEmpty()) return [3 /*break*/, 3];
chosenIndex = this.chooseIndex();
return [4, this.buffer.shuffleExcise(chosenIndex)];
return [4 /*yield*/, this.buffer.shuffleExcise(chosenIndex)];
case 2:

@@ -968,6 +1404,6 @@ result = _a.sent();

this.refill();
return [2, result];
return [2 /*return*/, result];
}
return [3, 1];
case 3: return [2, { value: null, done: true }];
return [3 /*break*/, 1];
case 3: return [2 /*return*/, { value: null, done: true }];
}

@@ -974,0 +1410,0 @@ });

@@ -0,4 +1,39 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { LazyIterator } from './lazy_iterator';
export declare abstract class StringIterator extends LazyIterator<string> {
/**
* Splits a string stream on a given separator.
*
* It is assumed that the incoming chunk boundaries have no semantic meaning,
* so conceptually the incoming stream is treated simply as the concatenation
* of its elements.
*
* The outgoing stream provides chunks corresponding to the results of the
* standard string split() operation (even if such a chunk spanned incoming
* chunks). The separators are not included.
*
* A typical usage is to split a text file (represented as a stream with
* arbitrary chunk boundaries) into lines.
*
* @param upstream A readable stream of strings that can be treated as
* concatenated.
* @param separator A character to split on.
*/
split(separator: string): StringIterator;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -49,3 +66,3 @@ var extendStatics = Object.setPrototypeOf ||

var lazy_iterator_1 = require("./lazy_iterator");
var StringIterator = (function (_super) {
var StringIterator = /** @class */ (function (_super) {
__extends(StringIterator, _super);

@@ -55,2 +72,20 @@ function StringIterator() {

}
/**
* Splits a string stream on a given separator.
*
* It is assumed that the incoming chunk boundaries have no semantic meaning,
* so conceptually the incoming stream is treated simply as the concatenation
* of its elements.
*
* The outgoing stream provides chunks corresponding to the results of the
* standard string split() operation (even if such a chunk spanned incoming
* chunks). The separators are not included.
*
* A typical usage is to split a text file (represented as a stream with
* arbitrary chunk boundaries) into lines.
*
* @param upstream A readable stream of strings that can be treated as
* concatenated.
* @param separator A character to split on.
*/
StringIterator.prototype.split = function (separator) {

@@ -62,3 +97,12 @@ return new SplitIterator(this, separator);

exports.StringIterator = StringIterator;
var SplitIterator = (function (_super) {
// ============================================================================
// The following private classes serve to implement the chainable methods
// on StringIterator. Unfortunately they can't be placed in separate files, due
// to resulting trouble with circular imports.
// ============================================================================
// We wanted multiple inheritance, e.g.
// class SplitIterator extends QueueIterator<string>, StringIterator
// but the TypeScript mixin approach is a bit hacky, so we take this adapter
// approach instead.
var SplitIterator = /** @class */ (function (_super) {
__extends(SplitIterator, _super);

@@ -77,3 +121,3 @@ function SplitIterator(upstream, separator) {

return __generator(this, function (_a) {
return [2, this.impl.next()];
return [2 /*return*/, this.impl.next()];
});

@@ -84,3 +128,3 @@ });

}(StringIterator));
var SplitIteratorImpl = (function (_super) {
var SplitIteratorImpl = /** @class */ (function (_super) {
__extends(SplitIteratorImpl, _super);

@@ -91,2 +135,3 @@ function SplitIteratorImpl(upstream, separator) {

_this.separator = separator;
// A partial string at the end of an upstream chunk
_this.carryover = '';

@@ -103,3 +148,3 @@ return _this;

switch (_b.label) {
case 0: return [4, this.upstream.next()];
case 0: return [4 /*yield*/, this.upstream.next()];
case 1:

@@ -109,9 +154,14 @@ chunkResult = _b.sent();

if (this.carryover === '') {
return [2, false];
return [2 /*return*/, false];
}
// Pretend that the pump succeeded in order to emit the small last batch.
// The next pump() call will actually fail.
this.outputQueue.push(this.carryover);
this.carryover = '';
return [2, true];
return [2 /*return*/, true];
}
lines = chunkResult.value.split(this.separator);
// Note the behavior: " ab ".split(' ') === ['', 'ab', '']
// Thus the carryover may be '' if the separator falls on a chunk
// boundary; this produces the correct result.
lines[0] = this.carryover + lines[0];

@@ -123,3 +173,3 @@ for (_i = 0, _a = lines.slice(0, -1); _i < _a.length; _i++) {

this.carryover = lines[lines.length - 1];
return [2, true];
return [2 /*return*/, true];
}

@@ -126,0 +176,0 @@ });

@@ -0,2 +1,26 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { FileChunkIterator, FileChunkIteratorOptions } from './file_chunk_iterator';
/**
* Provide a stream of chunks from a URL.
*
* Note this class first downloads the entire file into memory before providing
* the first element from the stream. This is because the Fetch API does not
* yet reliably provide a reader stream for the response body.
*/
export declare function urlChunkIterator(url: RequestInfo, options?: FileChunkIteratorOptions): Promise<FileChunkIterator>;

46

dist/iterators/url_chunk_iterator.js
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -40,2 +57,9 @@ return new (P || (P = Promise))(function (resolve, reject) {

var file_chunk_iterator_1 = require("./file_chunk_iterator");
/**
* Provide a stream of chunks from a URL.
*
* Note this class first downloads the entire file into memory before providing
* the first element from the stream. This is because the Fetch API does not
* yet reliably provide a reader stream for the response body.
*/
function urlChunkIterator(url, options) {

@@ -48,13 +72,13 @@ if (options === void 0) { options = {}; }

case 0:
if (!tfjs_core_1.ENV.get('IS_BROWSER')) return [3, 5];
return [4, fetch(url)];
if (!tfjs_core_1.ENV.get('IS_BROWSER')) return [3 /*break*/, 5];
return [4 /*yield*/, fetch(url)];
case 1:
response = _a.sent();
if (!response.ok) return [3, 3];
return [4, response.blob()];
if (!response.ok) return [3 /*break*/, 3];
return [4 /*yield*/, response.blob()];
case 2:
blob = _a.sent();
return [2, new file_chunk_iterator_1.FileChunkIterator(blob, options)];
return [2 /*return*/, new file_chunk_iterator_1.FileChunkIterator(blob, options)];
case 3: throw new Error(response.statusText);
case 4: return [3, 9];
case 4: return [3 /*break*/, 9];
case 5:

@@ -66,12 +90,12 @@ nodeFetch = require('node-fetch');

}
return [4, nodeFetch(url)];
return [4 /*yield*/, nodeFetch(url)];
case 6:
response = _a.sent();
if (!response.ok) return [3, 8];
return [4, response.buffer()];
if (!response.ok) return [3 /*break*/, 8];
return [4 /*yield*/, response.buffer()];
case 7:
unitArray = _a.sent();
return [2, new file_chunk_iterator_1.FileChunkIterator(unitArray, options)];
return [2 /*return*/, new file_chunk_iterator_1.FileChunkIterator(unitArray, options)];
case 8: throw new Error(response.statusText);
case 9: return [2];
case 9: return [2 /*return*/];
}

@@ -78,0 +102,0 @@ });

@@ -0,6 +1,175 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { Dataset } from './dataset';
import { CSVDataset } from './datasets/csv_dataset';
import { CSVConfig, DataElement } from './types';
/**
* Create a `CSVDataset` by reading and decoding CSV file(s) from provided URL
* or local path if it's in Node environment.
*
* ```js
* const csvUrl =
* 'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/boston-housing-train.csv';
*
* async function run() {
* // We want to predict the column "medv", which represents a median value of
* // a home (in $1000s), so we mark it as a label.
* const csvDataset = tf.data.csv(
* csvUrl, {
* columnConfigs: {
* medv: {
* isLabel: true
* }
* }
* });
*
* // Number of features is the number of column names minus one for the label
* // column.
* const numOfFeatures = (await csvDataset.columnNames()).length - 1;
*
* // Prepare the Dataset for training.
* const flattenedDataset =
* csvDataset
* .map(([rawFeatures, rawLabel]) =>
* // Convert rows from object form (keyed by column name) to array form.
* [Object.values(rawFeatures), Object.values(rawLabel)])
* .batch(10);
*
* // Define the model.
* const model = tf.sequential();
* model.add(tf.layers.dense({
* inputShape: [numOfFeatures],
* units: 1
* }));
* model.compile({
* optimizer: tf.train.sgd(0.000001),
* loss: 'meanSquaredError'
* });
*
* // Fit the model using the prepared Dataset
* return model.fitDataset(flattenedDataset, {
* epochs: 10,
* callbacks: {
* onEpochEnd: async (epoch, logs) => {
* console.log(epoch + ':' + logs.loss);
* }
* }
* });
* }
*
* await run();
* ```
*
* @param source URL or local path to get CSV file. If it's a local path, it
* must have prefix `file://` and it only works in node environment.
* @param csvConfig (Optional) A CSVConfig object that contains configurations
* of reading and decoding from CSV file(s).
*/
/**
* @doc {
* heading: 'Data',
* subheading: 'Creation',
* namespace: 'data',
* configParamIndices: [1]
* }
*/
export declare function csv(source: string, csvConfig?: CSVConfig): CSVDataset;
/**
* Create a `Dataset` that produces each element by calling a provided function.
*
* Note that repeated iterations over this `Dataset` may produce different
* results, because the function will be called anew for each element of each
* iteration.
*
* Also, beware that the sequence of calls to this function may be out of order
* in time with respect to the logical order of the Dataset. This is due to the
* asynchronous lazy nature of stream processing, and depends on downstream
* transformations (e.g. .shuffle()). If the provided function is pure, this is
* no problem, but if it is a closure over a mutable state (e.g., a traversal
* pointer), then the order of the produced elements may be scrambled.
*
* ```js
* let i = -1;
* const func = () =>
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
* const ds = tf.data.fromFunction(func);
* await ds.forEach(e => console.log(e));
* ```
*
* @param f A function that produces one data element on each call.
*/
export declare function func<T extends DataElement>(f: () => IteratorResult<T> | Promise<IteratorResult<T>>): Dataset<T>;
/**
* Create a `Dataset` that produces each element from provided JavaScript
* generator, which is a function* (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#Generator_functions),
* or a function that returns an iterator(https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#Generator_functions).
*
* The returned iterator should have `.next()` function that returns element in
* format of `{value: DataElement, done:boolean}`.
*
* Example of creating a dataset from an iterator factory:
* ```js
* function makeIterator() {
* const numElements = 10;
* let index = 0;
*
* const iterator = {
* next: () => {
* let result;
* if (index < numElements) {
* result = {value: index, done: false};
* index++;
* return result;
* }
* return {value: index, done: true};
* }
* };
* return iterator;
* }
* const ds = tfd.generator(makeIterator);
* ds.forEach(e => console.log(e));
* ```
*
* Example of creating a dataset from a generator:
* ```js
* function* dataGenerator() {
* const numElements = 10;
* let index = 0;
* while (index < numElements) {
* const x = index;
* index++;
* yield x;
* }
* }
*
* const ds = tfd.generator(dataGenerator);
* ds.forEach(e => console.log(e));
* ```
*
* @param generator A Javascript generator function that returns a JavaScript
* iterator.
*/
/**
* @doc {
* heading: 'Data',
* subheading: 'Creation',
* namespace: 'data',
* configParamIndices: [1]
* }
*/
export declare function generator<T extends DataElement>(generator: () => (Iterator<T> | Promise<Iterator<T>>)): Dataset<T>;
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -42,2 +59,72 @@ return new (P || (P = Promise))(function (resolve, reject) {

var url_data_source_1 = require("./sources/url_data_source");
/**
* Create a `CSVDataset` by reading and decoding CSV file(s) from provided URL
* or local path if it's in Node environment.
*
* ```js
* const csvUrl =
* 'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/boston-housing-train.csv';
*
* async function run() {
* // We want to predict the column "medv", which represents a median value of
* // a home (in $1000s), so we mark it as a label.
* const csvDataset = tf.data.csv(
* csvUrl, {
* columnConfigs: {
* medv: {
* isLabel: true
* }
* }
* });
*
* // Number of features is the number of column names minus one for the label
* // column.
* const numOfFeatures = (await csvDataset.columnNames()).length - 1;
*
* // Prepare the Dataset for training.
* const flattenedDataset =
* csvDataset
* .map(([rawFeatures, rawLabel]) =>
* // Convert rows from object form (keyed by column name) to array form.
* [Object.values(rawFeatures), Object.values(rawLabel)])
* .batch(10);
*
* // Define the model.
* const model = tf.sequential();
* model.add(tf.layers.dense({
* inputShape: [numOfFeatures],
* units: 1
* }));
* model.compile({
* optimizer: tf.train.sgd(0.000001),
* loss: 'meanSquaredError'
* });
*
* // Fit the model using the prepared Dataset
* return model.fitDataset(flattenedDataset, {
* epochs: 10,
* callbacks: {
* onEpochEnd: async (epoch, logs) => {
* console.log(epoch + ':' + logs.loss);
* }
* }
* });
* }
*
* await run();
* ```
*
* @param source URL or local path to get CSV file. If it's a local path, it
* must have prefix `file://` and it only works in node environment.
* @param csvConfig (Optional) A CSVConfig object that contains configurations
* of reading and decoding from CSV file(s).
*/
/**
* @doc {
* heading: 'Data',
* subheading: 'Creation',
* namespace: 'data',
* configParamIndices: [1]
* }
*/
function csv(source, csvConfig) {

@@ -48,2 +135,26 @@ if (csvConfig === void 0) { csvConfig = {}; }

exports.csv = csv;
/**
* Create a `Dataset` that produces each element by calling a provided function.
*
* Note that repeated iterations over this `Dataset` may produce different
* results, because the function will be called anew for each element of each
* iteration.
*
* Also, beware that the sequence of calls to this function may be out of order
* in time with respect to the logical order of the Dataset. This is due to the
* asynchronous lazy nature of stream processing, and depends on downstream
* transformations (e.g. .shuffle()). If the provided function is pure, this is
* no problem, but if it is a closure over a mutable state (e.g., a traversal
* pointer), then the order of the produced elements may be scrambled.
*
* ```js
* let i = -1;
* const func = () =>
* ++i < 5 ? {value: i, done: false} : {value: null, done: true};
* const ds = tf.data.fromFunction(func);
* await ds.forEach(e => console.log(e));
* ```
*
* @param f A function that produces one data element on each call.
*/
function func(f) {

@@ -53,6 +164,64 @@ var _this = this;

return dataset_1.datasetFromIteratorFn(function () { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) {
return [2, iter];
return [2 /*return*/, iter];
}); }); });
}
exports.func = func;
/**
* Create a `Dataset` that produces each element from provided JavaScript
* generator, which is a function* (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#Generator_functions),
* or a function that returns an iterator(https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Iterators_and_Generators#Generator_functions).
*
* The returned iterator should have `.next()` function that returns element in
* format of `{value: DataElement, done:boolean}`.
*
* Example of creating a dataset from an iterator factory:
* ```js
* function makeIterator() {
* const numElements = 10;
* let index = 0;
*
* const iterator = {
* next: () => {
* let result;
* if (index < numElements) {
* result = {value: index, done: false};
* index++;
* return result;
* }
* return {value: index, done: true};
* }
* };
* return iterator;
* }
* const ds = tfd.generator(makeIterator);
* ds.forEach(e => console.log(e));
* ```
*
* Example of creating a dataset from a generator:
* ```js
* function* dataGenerator() {
* const numElements = 10;
* let index = 0;
* while (index < numElements) {
* const x = index;
* index++;
* yield x;
* }
* }
*
* const ds = tfd.generator(dataGenerator);
* ds.forEach(e => console.log(e));
* ```
*
* @param generator A Javascript generator function that returns a JavaScript
* iterator.
*/
/**
* @doc {
* heading: 'Data',
* subheading: 'Creation',
* namespace: 'data',
* configParamIndices: [1]
* }
*/
function generator(generator) {

@@ -64,4 +233,4 @@ var _this = this;

_a = lazy_iterator_1.iteratorFromFunction;
return [4, generator()];
case 1: return [2, _a.apply(void 0, [(_b.sent()).next])];
return [4 /*yield*/, generator()];
case 1: return [2 /*return*/, _a.apply(void 0, [(_b.sent()).next])];
}

@@ -68,0 +237,0 @@ }); }); });

@@ -0,1 +1,18 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { DataSource } from '../datasource';

@@ -5,7 +22,19 @@ import { ByteChunkIterator } from '../iterators/byte_chunk_iterator';

import { FileElement } from '../types';
/**
* Represents a file, blob, or Uint8Array readable as a stream of binary data
* chunks.
*/
export declare class FileDataSource extends DataSource {
protected input: FileElement | string;
protected readonly options: FileChunkIteratorOptions;
/**
* Create a `FileDataSource`.
*
* @param input Local file path, or `File`/`Blob`/`Uint8Array` object to
* read. Local file only works in node environment.
* @param options Options passed to the underlying `FileChunkIterator`s,
* such as {chunksize: 1024}.
*/
constructor(input: FileElement | string, options?: FileChunkIteratorOptions);
iterator(): Promise<ByteChunkIterator>;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -52,4 +69,16 @@ var extendStatics = Object.setPrototypeOf ||

var source_util_1 = require("../util/source_util");
var FileDataSource = (function (_super) {
/**
* Represents a file, blob, or Uint8Array readable as a stream of binary data
* chunks.
*/
var FileDataSource = /** @class */ (function (_super) {
__extends(FileDataSource, _super);
/**
* Create a `FileDataSource`.
*
* @param input Local file path, or `File`/`Blob`/`Uint8Array` object to
* read. Local file only works in node environment.
* @param options Options passed to the underlying `FileChunkIterator`s,
* such as {chunksize: 1024}.
*/
function FileDataSource(input, options) {

@@ -70,3 +99,5 @@ if (options === void 0) { options = {}; }

}
return [2, new file_chunk_iterator_1.FileChunkIterator(this.input, this.options)];
// TODO(kangyizhang): Add LocalFileChunkIterator to split local streaming
// with file in browser.
return [2 /*return*/, new file_chunk_iterator_1.FileChunkIterator(this.input, this.options)];
});

@@ -73,0 +104,0 @@ });

@@ -0,1 +1,18 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { DataSource } from '../datasource';

@@ -7,4 +24,11 @@ import { ByteChunkIterator } from '../iterators/byte_chunk_iterator';

protected readonly fileOptions: FileChunkIteratorOptions;
/**
* Create a `URLDataSource`.
*
* @param url A source URL string, or a `Request` object.
* @param options Options passed to the underlying `FileChunkIterator`s,
* such as {chunksize: 1024}.
*/
constructor(url: RequestInfo, fileOptions?: FileChunkIteratorOptions);
iterator(): Promise<ByteChunkIterator>;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -52,4 +69,14 @@ var extendStatics = Object.setPrototypeOf ||

var file_data_source_1 = require("./file_data_source");
var URLDataSource = (function (_super) {
/*
* Represents a URL readable as a stream of binary data chunks.
*/
var URLDataSource = /** @class */ (function (_super) {
__extends(URLDataSource, _super);
/**
* Create a `URLDataSource`.
*
* @param url A source URL string, or a `Request` object.
* @param options Options passed to the underlying `FileChunkIterator`s,
* such as {chunksize: 1024}.
*/
function URLDataSource(url, fileOptions) {

@@ -62,2 +89,6 @@ if (fileOptions === void 0) { fileOptions = {}; }

}
// TODO(soergel): provide appropriate caching options. Currently this
// will download the URL anew for each call to iterator(). Since we have
// to treat the downloaded file as a blob/buffer anyway, we may as well retain
// it-- but that raises GC issues. Also we may want a persistent disk cache.
URLDataSource.prototype.iterator = function () {

@@ -67,9 +98,9 @@ return __awaiter(this, void 0, void 0, function () {

if (source_util_1.isLocalPath(this.url)) {
return [2, (new file_data_source_1.FileDataSource(this.url, this.fileOptions))
return [2 /*return*/, (new file_data_source_1.FileDataSource(this.url, this.fileOptions))
.iterator()];
}
else {
return [2, url_chunk_iterator_1.urlChunkIterator(this.url, this.fileOptions)];
return [2 /*return*/, url_chunk_iterator_1.urlChunkIterator(this.url, this.fileOptions)];
}
return [2];
return [2 /*return*/];
});

@@ -76,0 +107,0 @@ });

@@ -0,7 +1,34 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import * as tf from '@tensorflow/tfjs-core';
import { Dataset } from './dataset';
/**
* The value associated with a given key for a single element.
*
* Such a value may not have a batch dimension. A value may be a scalar or an
* n-dimensional array.
*/
export declare type ElementArray = number | number[] | tf.Tensor | string;
/**
* A map from string keys (aka column names) to values for a single element.
*/
export declare type TabularRecord = {
[key: string]: ElementArray;
};
/** An interface representing numeric statistics of a column. */
export interface NumericColumnStatistics {

@@ -15,6 +42,29 @@ min: number;

}
/**
* An interface representing column level NumericColumnStatistics for a
* Dataset.
*/
export interface DatasetStatistics {
[key: string]: NumericColumnStatistics;
}
/**
* Provides a function that scales numeric values into the [0, 1] interval.
*
* @param min the lower bound of the inputs, which should be mapped to 0.
* @param max the upper bound of the inputs, which should be mapped to 1,
* @return A function that maps an input ElementArray to a scaled ElementArray.
*/
export declare function scaleTo01(min: number, max: number): (value: ElementArray) => ElementArray;
/**
* Provides a function that calculates column level statistics, i.e. min, max,
* variance, stddev.
*
* @param dataset The Dataset object whose statistics will be calculated.
* @param sampleSize (Optional) If set, statistics will only be calculated
* against a subset of the whole data.
* @param shuffleWindowSize (Optional) If set, shuffle provided dataset before
* calculating statistics.
* @return A DatasetStatistics object that contains NumericColumnStatistics of
* each column.
*/
export declare function computeDatasetStatistics(dataset: Dataset<TabularRecord>, sampleSize?: number, shuffleWindowSize?: number): Promise<DatasetStatistics>;
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -39,2 +56,9 @@ return new (P || (P = Promise))(function (resolve, reject) {

var tf = require("@tensorflow/tfjs-core");
/**
* Provides a function that scales numeric values into the [0, 1] interval.
*
* @param min the lower bound of the inputs, which should be mapped to 0.
* @param max the upper bound of the inputs, which should be mapped to 1,
* @return A function that maps an input ElementArray to a scaled ElementArray.
*/
function scaleTo01(min, max) {

@@ -63,2 +87,14 @@ var range = max - min;

exports.scaleTo01 = scaleTo01;
/**
* Provides a function that calculates column level statistics, i.e. min, max,
* variance, stddev.
*
* @param dataset The Dataset object whose statistics will be calculated.
* @param sampleSize (Optional) If set, statistics will only be calculated
* against a subset of the whole data.
* @param shuffleWindowSize (Optional) If set, shuffle provided dataset before
* calculating statistics.
* @return A DatasetStatistics object that contains NumericColumnStatistics of
* each column.
*/
function computeDatasetStatistics(dataset, sampleSize, shuffleWindowSize) {

@@ -71,2 +107,3 @@ return __awaiter(this, void 0, void 0, function () {

sampleDataset = dataset;
// TODO(soergel): allow for deep shuffle where possible.
if (shuffleWindowSize != null) {

@@ -79,3 +116,3 @@ sampleDataset = sampleDataset.shuffle(shuffleWindowSize);

result = {};
return [4, sampleDataset.forEach(function (e) {
return [4 /*yield*/, sampleDataset.forEach(function (e) {
for (var _i = 0, _a = Object.keys(e); _i < _a.length; _i++) {

@@ -85,2 +122,3 @@ var key = _a[_i];

if (typeof (value) === 'string') {
// No statistics for string element.
}

@@ -110,2 +148,4 @@ else {

var recordMax = void 0;
// Calculate accumulated mean and variance following tf.Transform
// implementation
var valueLength = 0;

@@ -121,4 +161,4 @@ var valueMean = 0;

var valueMoment = tf.moments(value);
valueMean = valueMoment.mean.get();
valueVariance = valueMoment.variance.get();
valueMean = valueMoment.mean.dataSync()[0];
valueVariance = valueMoment.variance.dataSync()[0];
valueLength = value.size;

@@ -130,4 +170,4 @@ }

var valueMoment = tf.moments(value);
valueMean = valueMoment.mean.get();
valueVariance = valueMoment.variance.get();
valueMean = valueMoment.mean.dataSync()[0];
valueVariance = valueMoment.variance.dataSync()[0];
valueLength = value.length;

@@ -165,2 +205,3 @@ }

_a.sent();
// Variance and stddev should be NaN for the case of a single element.
for (key in result) {

@@ -173,3 +214,3 @@ stat = result[key];

}
return [2, result];
return [2 /*return*/, result];
}

@@ -176,0 +217,0 @@ });

@@ -0,1 +1,17 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
export {};
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
Object.defineProperty(exports, "__esModule", { value: true });
var jasmine_util_1 = require("@tensorflow/tfjs-core/dist/jasmine_util");
var backend_cpu_1 = require("@tensorflow/tfjs-core/dist/kernels/backend_cpu");
// tslint:disable-next-line:no-require-imports
var jasmine = require('jasmine');

@@ -6,0 +23,0 @@ process.on('unhandledRejection', function (e) { throw e; });

@@ -0,1 +1,18 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { DataType } from '@tensorflow/tfjs-core';

@@ -5,2 +22,5 @@ import { TensorContainer, TensorContainerArray, TensorContainerObject } from '@tensorflow/tfjs-core/dist/tensor_types';

import { LazyIterator } from './iterators/lazy_iterator';
/**
* JSON-like type representing a nested structure of primitives or Tensors.
*/
export declare type DataElement = TensorContainer;

@@ -16,5 +36,31 @@ export declare type DataElementObject = TensorContainerObject;

}
/**
* A nested structure of Datasets, used as the input to zip().
*/
export declare type DatasetContainer = Container<Dataset<DataElement>>;
/**
* A nested structure of LazyIterators, used as the input to zip().
*/
export declare type IteratorContainer = Container<LazyIterator<DataElement>>;
/**
* Types supported by FileChunkIterator in both Browser and Node Environment.
*/
export declare type FileElement = File | Blob | Uint8Array;
/**
* A dictionary containing column level configurations when reading and decoding
* CSV file(s) from csv source.
* Has the following fields:
* - `required` If value in this column is required. If set to `true`, throw an
* error when it finds an empty value.
*
* - `dtype` Data type of this column. Could be int32, float32, bool, or string.
*
* - `default` Default value of this column.
*
* - `isLabel` Whether this column is label instead of features. If isLabel is
* `true` for at least one column, the .csv() API will return an array of two
* items: the first item is a dict of features key/value pairs, the second item
* is a dict of labels key/value pairs. If no column is marked as label returns
* a dict of features only.
*/
export interface ColumnConfig {

@@ -26,10 +72,56 @@ required?: boolean;

}
/**
* Interface for configuring dataset when reading and decoding from CSV file(s).
*/
export interface CSVConfig {
/**
* A boolean value that indicates whether the first row of provided CSV file
* is a header line with column names, and should not be included in the data.
*/
hasHeader?: boolean;
/**
* A list of strings that corresponds to the CSV column names, in order. If
* provided, it ignores the column names inferred from the header row. If not
* provided, infers the column names from the first row of the records. If
* `hasHeader` is false and `columnNames` is not provided, this method will
* throw an error.
*/
columnNames?: string[];
/**
* A dictionary whose key is column names, value is an object stating if this
* column is required, column's data type, default value, and if this column
* is label. If provided, keys must correspond to names provided in
* `columnNames` or inferred from the file header lines. If any column is
* marked as label, the .csv() API will return an array of two items: the
* first item is a dict of features key/value pairs, the second item is a dict
* of labels key/value pairs. If no column is marked as label returns a dict
* of features only.
*
* Has the following fields:
* - `required` If value in this column is required. If set to `true`, throw
* an error when it finds an empty value.
*
* - `dtype` Data type of this column. Could be int32, float32, bool, or
* string.
*
* - `default` Default value of this column.
*
* - `isLabel` Whether this column is label instead of features. If isLabel is
* `true` for at least one column, the .csv() API will return an array of two
* items: the first item is a dict of features key/value pairs, the second
* item is a dict of labels key/value pairs. If no column is marked as label
* returns a dict of features only.
*/
columnConfigs?: {
[key: string]: ColumnConfig;
};
/**
* If true, only columns provided in `columnConfigs` will be parsed and
* provided during iteration.
*/
configuredColumnsOnly?: boolean;
/**
* The string used to parse each line of the input file.
*/
delimiter?: string;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
Object.defineProperty(exports, "__esModule", { value: true });
//# sourceMappingURL=types.js.map

@@ -0,1 +1,24 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
/**
* A return value for a mapping function that can be applied via deepMap.
*
* If recurse is true, the value should be empty, and iteration will continue
* into the object or array.
*/
export declare type DeepMapResult = {

@@ -5,5 +28,50 @@ value: any;

};
/**
* Apply a mapping function to a nested structure in a recursive manner.
*
* The result of the mapping is an object with the same nested structure (i.e.,
* of arrays and dicts) as the input, except that some subtrees are replaced,
* according to the results of the mapping function.
*
* Mappings are memoized. Thus, if the nested structure contains the same
* object in multiple positions, the output will contain the same mapped object
* in those positions. Cycles are not supported, however.
*
* @param input: The object to which to apply the mapping function.
* @param mapFn: A function that expects a single node of the object tree, and
* returns a `DeepMapResult`. The `DeepMapResult` either provides a
* replacement value for that node (i.e., replacing the subtree), or indicates
* that the node should be processed recursively.
*/
export declare function deepMap(input: any, mapFn: (x: any) => DeepMapResult): any | any[];
/**
* Zip nested structures together in a recursive manner.
*
* This has the effect of transposing or pivoting data, e.g. converting it from
* a row-major representation to a column-major representation.
*
* For example, `deepZip([{a: 1, b: 2}, {a: 3, b: 4}])` returns
* `{a: [1, 3], b: [2, 4]}`.
*
* The inputs should all have the same nested structure (i.e., of arrays and
* dicts). The result is a single object with the same nested structure, where
* the leaves are arrays collecting the values of the inputs at that location
* (or, optionally, the result of a custom function applied to those arrays).
*
* @param inputs: An array of the objects to zip together.
* @param zipFn: (optional) A function that expects an array of elements at a
* single node of the object tree, and returns a `DeepMapResult`. The
* `DeepMapResult` either provides a result value for that node (i.e.,
* representing the subtree), or indicates that the node should be processed
* recursively. The default zipFn recurses as far as possible and places
* arrays at the leaves.
*/
export declare function deepZip(inputs: any[], zipFn?: (xs: any[]) => DeepMapResult): any | any[];
export declare function zipToList(x: any[]): DeepMapResult;
/**
* A return value for an async map function for use with deepMapAndAwaitAll.
*
* If recurse is true, the value should be empty, and iteration will continue
* into the object or array.
*/
export declare type DeepMapAsyncResult = {

@@ -13,4 +81,37 @@ value: Promise<any>;

};
/**
* Apply an async mapping function to a nested structure in a recursive manner.
*
* This first creates a nested structure of Promises, and then awaits all of
* those, resulting in a single Promise for a resolved nested structure.
*
* The result of the mapping is an object with the same nested structure (i.e.,
* of arrays and dicts) as the input, except that some subtrees are replaced,
* according to the results of the mapping function.
*
* Mappings are memoized. Thus, if the nested structure contains the same
* object in multiple positions, the output will contain the same mapped object
* in those positions. Cycles are not supported, however.
*
* @param input: The object to which to apply the mapping function.
* @param mapFn: A function that expects a single node of the object tree, and
* returns a `DeepMapAsyncResult`. The `DeepMapAsyncResult` either provides
* a `Promise` for a replacement value for that node (i.e., replacing the
* subtree), or indicates that the node should be processed recursively. Note
* that the decision whether or not to recurse must be made immediately; only
* the mapped value may be promised.
*/
export declare function deepMapAndAwaitAll(input: any, mapFn: (x: any) => DeepMapAsyncResult): Promise<any | any[]>;
/**
* Determine whether the argument is iterable.
*
* @returns true if the argument is an array or any non-Tensor object.
*/
export declare function isIterable(obj: any): boolean;
/**
* Determine whether the argument is an array of numbers.
*
* @returns true if the argument is an array and all of its children are
* numbers; false otherwise.
*/
export declare function isNumericArray(obj: any): boolean;
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -39,2 +56,19 @@ return new (P || (P = Promise))(function (resolve, reject) {

var tf = require("@tensorflow/tfjs-core");
/**
* Apply a mapping function to a nested structure in a recursive manner.
*
* The result of the mapping is an object with the same nested structure (i.e.,
* of arrays and dicts) as the input, except that some subtrees are replaced,
* according to the results of the mapping function.
*
* Mappings are memoized. Thus, if the nested structure contains the same
* object in multiple positions, the output will contain the same mapped object
* in those positions. Cycles are not supported, however.
*
* @param input: The object to which to apply the mapping function.
* @param mapFn: A function that expects a single node of the object tree, and
* returns a `DeepMapResult`. The `DeepMapResult` either provides a
* replacement value for that node (i.e., replacing the subtree), or indicates
* that the node should be processed recursively.
*/
function deepMap(input, mapFn) {

@@ -44,2 +78,8 @@ return deepMapInternal(input, mapFn);

exports.deepMap = deepMap;
/**
* @param seen: A Map of known object mappings (i.e., memoized results of
* `mapFn()`)
* @param containedIn: An set containing objects on the reference path currently
* being processed (used to detect cycles).
*/
function deepMapInternal(input, mapFn, seen, containedIn) {

@@ -66,2 +106,3 @@ if (seen === void 0) { seen = new Map(); }

else if (isIterable(input)) {
// tslint:disable-next-line:no-any
var mappedIterable = Array.isArray(input) ? [] : {};

@@ -81,2 +122,26 @@ containedIn.add(input);

}
// TODO(soergel, kangyizhang) Reconsider naming of deepZip() to avoid confusion
// with zip()
/**
* Zip nested structures together in a recursive manner.
*
* This has the effect of transposing or pivoting data, e.g. converting it from
* a row-major representation to a column-major representation.
*
* For example, `deepZip([{a: 1, b: 2}, {a: 3, b: 4}])` returns
* `{a: [1, 3], b: [2, 4]}`.
*
* The inputs should all have the same nested structure (i.e., of arrays and
* dicts). The result is a single object with the same nested structure, where
* the leaves are arrays collecting the values of the inputs at that location
* (or, optionally, the result of a custom function applied to those arrays).
*
* @param inputs: An array of the objects to zip together.
* @param zipFn: (optional) A function that expects an array of elements at a
* single node of the object tree, and returns a `DeepMapResult`. The
* `DeepMapResult` either provides a result value for that node (i.e.,
* representing the subtree), or indicates that the node should be processed
* recursively. The default zipFn recurses as far as possible and places
* arrays at the leaves.
*/
function deepZip(inputs, zipFn) {

@@ -87,4 +152,10 @@ if (zipFn === void 0) { zipFn = zipToList; }

exports.deepZip = deepZip;
/**
* @param containedIn: An set containing objects on the reference path currently
* being processed (used to detect cycles).
*/
function deepZipInternal(inputs, zipFn, containedIn) {
if (containedIn === void 0) { containedIn = new Set(); }
// The recursion follows the structure of input 0; it's assumed that all the
// other inputs have the same structure.
var input = inputs[0];

@@ -102,2 +173,3 @@ if (containedIn.has(input)) {

else if (isIterable(input)) {
// tslint:disable-next-line:no-any
var mappedIterable = Array.isArray(input) ? [] : {};

@@ -120,2 +192,3 @@ containedIn.add(input);

}
// tslint:disable-next-line:no-any
function zipToList(x) {

@@ -125,2 +198,3 @@ if (x === null) {

}
// TODO(soergel): validate array type?
if (isIterable(x[0])) {

@@ -134,2 +208,24 @@ return { value: null, recurse: true };

exports.zipToList = zipToList;
/**
* Apply an async mapping function to a nested structure in a recursive manner.
*
* This first creates a nested structure of Promises, and then awaits all of
* those, resulting in a single Promise for a resolved nested structure.
*
* The result of the mapping is an object with the same nested structure (i.e.,
* of arrays and dicts) as the input, except that some subtrees are replaced,
* according to the results of the mapping function.
*
* Mappings are memoized. Thus, if the nested structure contains the same
* object in multiple positions, the output will contain the same mapped object
* in those positions. Cycles are not supported, however.
*
* @param input: The object to which to apply the mapping function.
* @param mapFn: A function that expects a single node of the object tree, and
* returns a `DeepMapAsyncResult`. The `DeepMapAsyncResult` either provides
* a `Promise` for a replacement value for that node (i.e., replacing the
* subtree), or indicates that the node should be processed recursively. Note
* that the decision whether or not to recurse must be made immediately; only
* the mapped value may be promised.
*/
function deepMapAndAwaitAll(input, mapFn) {

@@ -142,2 +238,3 @@ return __awaiter(this, void 0, void 0, function () {

seen = new Map();
// First do a normal deepMap, collecting Promises in 'seen' as a side effect.
deepMapInternal(input, mapFn, seen);

@@ -147,7 +244,7 @@ _i = 0, _a = Array.from(seen.keys());

case 1:
if (!(_i < _a.length)) return [3, 4];
if (!(_i < _a.length)) return [3 /*break*/, 4];
key = _a[_i];
value = seen.get(key);
if (!(value instanceof Promise)) return [3, 3];
return [4, value];
if (!(value instanceof Promise)) return [3 /*break*/, 3];
return [4 /*yield*/, value];
case 2:

@@ -159,6 +256,6 @@ mappedValue = _b.sent();

_i++;
return [3, 1];
return [3 /*break*/, 1];
case 4:
result = deepMapInternal(input, mapFn, seen);
return [2, result];
return [2 /*return*/, result];
}

@@ -169,2 +266,8 @@ });

exports.deepMapAndAwaitAll = deepMapAndAwaitAll;
/**
* Determine whether the argument is iterable.
*
* @returns true if the argument is an array or any non-Tensor object.
*/
// tslint:disable-next-line:no-any
function isIterable(obj) {

@@ -176,2 +279,9 @@ return obj != null &&

exports.isIterable = isIterable;
/**
* Determine whether the argument is an array of numbers.
*
* @returns true if the argument is an array and all of its children are
* numbers; false otherwise.
*/
// tslint:disable-next-line:no-any
function isNumericArray(obj) {

@@ -178,0 +288,0 @@ if (obj == null) {

@@ -0,4 +1,24 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
import { RingBuffer } from './ring_buffer';
export declare class GrowingRingBuffer<T> extends RingBuffer<T> {
private static INITIAL_CAPACITY;
/**
* Constructs a `GrowingRingBuffer`.
*/
constructor();

@@ -8,3 +28,6 @@ isFull(): boolean;

unshift(value: T): void;
/**
* Doubles the capacity of the buffer.
*/
private expand;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
var __extends = (this && this.__extends) || (function () {

@@ -14,4 +31,7 @@ var extendStatics = Object.setPrototypeOf ||

var ring_buffer_1 = require("./ring_buffer");
var GrowingRingBuffer = (function (_super) {
var GrowingRingBuffer = /** @class */ (function (_super) {
__extends(GrowingRingBuffer, _super);
/**
* Constructs a `GrowingRingBuffer`.
*/
function GrowingRingBuffer() {

@@ -35,2 +55,5 @@ return _super.call(this, GrowingRingBuffer.INITIAL_CAPACITY) || this;

};
/**
* Doubles the capacity of the buffer.
*/
GrowingRingBuffer.prototype.expand = function () {

@@ -40,2 +63,4 @@ var newCapacity = this.capacity * 2;

var len = this.length();
// Rotate the buffer to start at index 0 again, since we can't just
// allocate more space at the end.
for (var i = 0; i < len; i++) {

@@ -42,0 +67,0 @@ newData[i] = this.get(this.wrap(this.begin + i));

@@ -0,1 +1,21 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
/**
* A ring buffer, providing O(1) FIFO, LIFO, and related operations.
*/
export declare class RingBuffer<T> {

@@ -7,15 +27,59 @@ capacity: number;

protected data: T[];
/**
* Constructs a `RingBuffer`.
* @param capacity The number of items that the buffer can accomodate.
*/
constructor(capacity: number);
/**
* Map any index into the range 0 <= index < 2*capacity.
*/
protected wrap(index: number): number;
protected get(index: number): T;
protected set(index: number, value: T): void;
/**
* Returns the current number of items in the buffer.
*/
length(): number;
/**
* Reports whether the buffer is full.
* @returns true if the number of items in the buffer equals its capacity, and
* false otherwise.
*/
isFull(): boolean;
/**
* Reports whether the buffer is empty.
* @returns true if the number of items in the buffer equals zero, and
* false otherwise.
*/
isEmpty(): boolean;
/**
* Adds an item to the end of the buffer.
*/
push(value: T): void;
/**
* Adds many items to the end of the buffer, in order.
*/
pushAll(values: T[]): void;
/**
* Removes and returns the last item in the buffer.
*/
pop(): T;
/**
* Adds an item to the beginning of the buffer.
*/
unshift(value: T): void;
/**
* Removes and returns the first item in the buffer.
*/
shift(): T;
/**
* Removes and returns a specific item in the buffer, and moves the last item
* to the vacated slot. This is useful for implementing a shuffling stream.
* Note that this operation necessarily scrambles the original order.
*
* @param relativeIndex: the index of the item to remove, relative to the
* first item in the buffer (e.g., hiding the ring nature of the underlying
* storage).
*/
shuffleExcise(relativeIndex: number): T;
}
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
Object.defineProperty(exports, "__esModule", { value: true });
var RingBuffer = (function () {
/**
* A ring buffer, providing O(1) FIFO, LIFO, and related operations.
*/
var RingBuffer = /** @class */ (function () {
/**
* Constructs a `RingBuffer`.
* @param capacity The number of items that the buffer can accomodate.
*/
function RingBuffer(capacity) {
this.capacity = capacity;
this.begin = 0;
this.end = 0;
// Note we store the indices in the range 0 <= index < 2*capacity.
// This allows us to distinguish the full from the empty case.
// See https://www.snellman.net/blog/archive/2016-12-13-ring-buffers/
this.begin = 0; // inclusive
this.end = 0; // exclusive
if (capacity < 1) {

@@ -14,3 +41,7 @@ throw new RangeError('Can\'t create ring buffer of capacity < 1.');

}
/**
* Map any index into the range 0 <= index < 2*capacity.
*/
RingBuffer.prototype.wrap = function (index) {
// don't trust % on negative numbers
while (index < 0) {

@@ -33,2 +64,5 @@ index += this.doubledCapacity;

};
/**
* Returns the current number of items in the buffer.
*/
RingBuffer.prototype.length = function () {

@@ -41,8 +75,21 @@ var length = this.end - this.begin;

};
/**
* Reports whether the buffer is full.
* @returns true if the number of items in the buffer equals its capacity, and
* false otherwise.
*/
RingBuffer.prototype.isFull = function () {
return this.length() === this.capacity;
};
/**
* Reports whether the buffer is empty.
* @returns true if the number of items in the buffer equals zero, and
* false otherwise.
*/
RingBuffer.prototype.isEmpty = function () {
return this.length() === 0;
};
/**
* Adds an item to the end of the buffer.
*/
RingBuffer.prototype.push = function (value) {

@@ -55,2 +102,5 @@ if (this.isFull()) {

};
/**
* Adds many items to the end of the buffer, in order.
*/
RingBuffer.prototype.pushAll = function (values) {

@@ -62,2 +112,5 @@ for (var _i = 0, values_1 = values; _i < values_1.length; _i++) {

};
/**
* Removes and returns the last item in the buffer.
*/
RingBuffer.prototype.pop = function () {

@@ -72,2 +125,5 @@ if (this.isEmpty()) {

};
/**
* Adds an item to the beginning of the buffer.
*/
RingBuffer.prototype.unshift = function (value) {

@@ -80,2 +136,5 @@ if (this.isFull()) {

};
/**
* Removes and returns the first item in the buffer.
*/
RingBuffer.prototype.shift = function () {

@@ -90,2 +149,11 @@ if (this.isEmpty()) {

};
/**
* Removes and returns a specific item in the buffer, and moves the last item
* to the vacated slot. This is useful for implementing a shuffling stream.
* Note that this operation necessarily scrambles the original order.
*
* @param relativeIndex: the index of the item to remove, relative to the
* first item in the buffer (e.g., hiding the ring nature of the underlying
* storage).
*/
RingBuffer.prototype.shuffleExcise = function (relativeIndex) {

@@ -92,0 +160,0 @@ if (this.isEmpty()) {

@@ -0,1 +1,18 @@

/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
export declare function isLocalPath(source: any): boolean;
"use strict";
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
Object.defineProperty(exports, "__esModule", { value: true });
// Skip tslint any type check cause this method is aiming to check type of
// input.
// tslint:disable-next-line:no-any
function isLocalPath(source) {

@@ -4,0 +24,0 @@ return (typeof source === 'string') && source.substr(0, 7) === 'file://';

@@ -1,2 +0,3 @@

declare const version = "1.0.0-alpha1";
/** @license See the LICENSE file. */
declare const version = "1.0.0-alpha2";
export { version };
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
var version = '1.0.0-alpha1';
/** @license See the LICENSE file. */
// This code is auto-generated, do not modify this file!
var version = '1.0.0-alpha2';
exports.version = version;
//# sourceMappingURL=version.js.map
{
"name": "@tensorflow/tfjs-data",
"version": "1.0.0-alpha1",
"version": "1.0.0-alpha2",
"description": "TensorFlow Data API in JavaScript",

@@ -14,3 +14,3 @@ "private": false,

"devDependencies": {
"@tensorflow/tfjs-core": "1.0.0-alpha1",
"@tensorflow/tfjs-core": "1.0.0-alpha2",
"@types/fetch-mock": "^6.0.1",

@@ -56,3 +56,3 @@ "@types/jasmine": "~2.5.53",

"peerDependencies": {
"@tensorflow/tfjs-core": "1.0.0-alpha1"
"@tensorflow/tfjs-core": "1.0.0-alpha2"
},

@@ -63,3 +63,6 @@ "dependencies": {

"seedrandom": "~2.4.3"
},
"browser": {
"fs": false
}
}

@@ -6,3 +6,3 @@ {

"sourceMap": true,
"removeComments": true,
"removeComments": false,
"preserveConstEnums": true,

@@ -9,0 +9,0 @@ "declaration": true,

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc