LEO Quick Start Guide: https://github.com/LeoPlatform/Leo
Index
Working with aggregate data
NPM Requirements
- leo-connector-entity-table: 2.1.0+
Step 1: Loading data into the entities table
Create a new bot to load entities from a queue:
const config = require('leo-config');
const entityTable = require('leo-connector-entity-table');
const queueName = "example_queue_1";
entityTable.loadFromQueue(config.entityTableName, queueName, payloadTransform, {
botId: context.botId,
batchRecords: 25,
merge: false
}).then(() => {
console.log(`Completed. Remaining Time:`, context.getRemainingTimeInMillis());
callback();
}).catch(callback);
const payloadTransform = (payload, hash) => {
let hashed = hash(queueName, payload.id);
let id = payload.id.toString();
return Object.assign({}, payload, {
id: id,
partition: hashed
});
};
In the package.json, be sure to add the entity table name and set the trigger to be the queue you’re reading from.
Example package.json:
{
"name": "1-SampleEntityLoader",
"version": "1.0.0",
"description": "Takes data from a queue and loads into the entity table (DynamoDB)",
"main": "index.js",
"directories": {
"test": "test"
},
"scripts": {
"test": "leo-cli test . "
},
"config": {
"leo": {
"type": "cron",
"memory": 128,
"timeout": 300,
"role": "LeoEntitiesChangesRole",
"env": {
"entityTableName": {
"Fn::Sub": "${Entities}"
}
},
"cron": {
"settings": {},
"triggers": [
"example_queue_1"
]
}
}
}
}
Step 2: Write entity changes to a queue
If you already have a entity table processor bot setup, you can skip this step.
For this step, you'll need to have 1 entity change processor that contains a single line:
exports.handler = require("leo-connector-entity-table").tableProcessor;
And make sure the bot has the proper role:
{
"name": "2-EntityChangeProcessor",
"version": "1.0.0",
"description": "Reads from DynamoDB entity table and writes to a queue with entity changes",
"main": "index.js",
"directories": {
"test": "test"
},
"scripts": {
"test": "leo-cli test . "
},
"config": {
"leo": {
"type": "bot",
"memory": 256,
"timeout": 30,
"role": "LeoEntitiesChangesRole",
"env": {},
"cron": {
"settings": {}
}
}
}
}
Step 3: Aggregate data and load it into the aggregations table
const leo = require("leo-sdk");
const config = require("leo-config");
const ls = leo.streams;
const agg = require("leo-connector-entity-table/lib/aggregations.js");
const entity = 'example';
exports.handler = require("leo-sdk/wrappers/cron.js")((event, context, callback) => {
let source = Object.assign({
source: "example_queue_1_changes"
}, event).source;
let stats = ls.stats(context.botId, source);
ls.pipe(
leo.read(context.botId, source)
, stats
, agg.aggregator(config.aggregationTableName, entity, payload => [transformChanges(payload)])
, err => {
if (err) {
callback(err);
} else {
let statsData = stats.get();
stats.checkpoint((err) => {
if (err) {
return callback(err);
}
if (statsData.units > 0) {
leo.bot.checkpoint(context.botId, `system:dynamodb.${config.aggregationTableName.replace(/-[A-Z0-9]{12}$/, "")}.${entity}`, {
type: "write",
eid: statsData.eid,
records: statsData.units,
started_timestamp: statsData.started_timestamp,
ended_timestamp: statsData.ended_timestamp,
source_timestamp: statsData.source_timestamp
}, () => {
callback();
});
} else {
callback();
}
});
}
}
);
});
const transformChanges = payload => ({
entity: entity,
id: payload.id,
aggregate: {
timestamp: payload.updated,
buckets: ["alltime", "hourly", "minutely"]
},
data: {
id: agg.first(Date.now(), payload.id),
payload: agg.last(Date.now(), payload)
}
});
package.json
{
"name": "SampleEntityChanges",
"version": "1.0.0",
"description": "",
"main": "index.js",
"directories": {
"test": "test"
},
"scripts": {
"test": "leo-cli test ."
},
"config": {
"leo": {
"type": "cron",
"memory": 128,
"timeout": 300,
"role": "LeoEntitiesChangesRole",
"env": {
"aggregationTableName": {
"Fn::Sub": "${Aggregations}"
}
},
"cron": {
"settings": {},
"triggers": [
"enriched_numbers_changes"
]
}
}
}
}
Step 4: Process old/new aggregate data
Create a bot to process old/new data. It's just a single command with configuration parameters:
exports.handler = require("leo-connector-entity-table").tableOldNewProcessor({
defaultQueue: "sample_aggregation_changes",
resourcePrefix: "sample",
eventSuffix: "_aggregations",
botSuffix: "_aggregation_changes"
});
package.json
{
"name": "SampleEntityAggregations",
"version": "1.0.0",
"description": "Reads from DynamoDB entity table and writes to a queue with entity changes",
"main": "index.js",
"directories": {
"test": "test"
},
"scripts": {
"test": "leo-cli test . "
},
"config": {
"leo": {
"type": "bot",
"memory": 256,
"timeout": 30,
"role": "LeoEntitiesChangesRole",
"env": {},
"cron": {
"settings": {}
}
}
}
}
Available Aggregate functions
sum(field)
Gets the sum of all of this field for the selected buckets.
Example
data: {
totalAmountSpent: sum(payload.paymentAmount)
}
min(field)
Gets the min value for this field for the selected buckets.
Example
data: {
minPayment: min(payload.paymentAmount)
}
max(field)
Gets the max value for this field for the selected buckets.
Example
data: {
maxPayment: max(payload.paymentAmount)
}
countChanges(field)
Counts the number of times this field has changed for the selected buckets.
Example
Counting the number of times the temperature has changed in the given time period (see selected buckets).
data: {
totalChanges: countChanges(payload.currentTemperature)
}
last(date, field)
Gets the last value of the selected field prior to selected date.
Example
data: {
lastOrderId: last(payload.date_created, payload.id)
}
first(date, field)
Gets the first value of the selected field for the selected bucket (see bucket options).
Example
data: {
firstOrderId: first(payload.date_created, payload.id)
}
hash(key, function)
Create a hash somehow.
Example
aggregator(tableName, namespace, transformFunction)
Create an aggregation from a selected namespace and transformed data using the available functions.
Example
aggregator(config.aggregationTableName, entity, payload => [transformChanges(payload)])
const transformChanges = payload => ({
entity: entity,
id: payload.id,
aggregate: {
timestamp: payload.updated,
buckets: ["alltime", "hourly", "minutely"]
},
data: {
firstOrderId: first(payload.date_created, payload.id),
lastOrderId: last(payload.date_created, payload.id),
totalAmountSpent: sum(payload.payments),
}
});
Available querying functions
getCurrent
getCurrentMeta
query
Support
Want to hire an expert, or need technical support? Reach out to the Leo team: https://leoinsights.com/contact