tiny-osmpbf
Advanced tools
Comparing version 0.1.0 to 1.0.0-beta.1
360
index.js
@@ -0,49 +1,18 @@ | ||
// "zlib" decompression | ||
var inflate = require('tiny-inflate') | ||
// protocol buffers library | ||
var Pbf = require('pbf') | ||
// readers for the two pbf formats used in a osmpbf file | ||
var FileFormat = require('./proto/fileformat.js') | ||
var OsmFormat = require('./proto/osmformat.js') | ||
var memberTypes = { | ||
0: 'node', | ||
1: 'way', | ||
2: 'relation' | ||
} | ||
var supportedFeatures = { | ||
"OsmSchema-V0.6": true, | ||
"DenseNodes": true, | ||
"HistoricalInformation": true | ||
} | ||
// extracts and decompresses a data blob | ||
function extractBlobData(blob) { | ||
// todo: add tests for non-zlib cases | ||
switch (true) { | ||
// error cases: | ||
// * lzma compressed data (support for this kind of data is not required by the specs) | ||
case blob.lzma_data !== null: | ||
throw new Error("unsupported osmpbf blob data type: lzma_data") | ||
// * formerly used for bzip2 compressed data, deprecated since 2010 | ||
case blob.OBSOLETE_bzip2_data !== null: | ||
throw new Error("unsupported osmpbf blob data type: OBSOLETE_bzip2_data") | ||
// * empty data blob?? | ||
default: | ||
throw new Error("unsupported osmpbf blob data type: <empty blob>") | ||
// supported data formats: | ||
// * uncompressed data | ||
case blob.raw !== null: | ||
return blob.raw | ||
// * zlib "deflate" compressed data | ||
case blob.zlib_data !== null: | ||
var blobData = new Buffer(blob.raw_size) | ||
inflate(blob.zlib_data.slice(2), blobData) | ||
return blobData | ||
} | ||
} | ||
/* main function of the library | ||
* input: osmpbf data as a javascript arraybuffer | ||
* handler: (optional) callback that is called for each osm element | ||
* return value: a OSM-JSON object with file metadata and (if no custom handler | ||
* is specified) all parsed osm elements in an array | ||
*/ | ||
module.exports = function(input, handler) { | ||
// default element handler: save them in an array to be returned at the end | ||
var elements = undefined | ||
@@ -59,22 +28,105 @@ if (handler === undefined) { | ||
/* A osm pbf file contains a repeating sequence of fileblocks: | ||
* blobHeaderLength: length of the following blobHeader message (32 bit | ||
* integer, big endian/network byte order) | ||
* blobHeader: pbf-serialized BlobHeader message, containing the size of | ||
* the following blobData message | ||
* blob: pbf-serialized Blob message (contains compressed osm data) | ||
*/ | ||
pbf = new Pbf(input) | ||
pbf.length = 0 | ||
// helper function to wind pbf reader forward | ||
pbf.forward = function(nextLength, relative) { | ||
this.pos = this.length | ||
this.length += nextLength | ||
} | ||
pbf.forward(4) | ||
blobHeaderLength = new DataView(new Uint8Array(input).buffer).getInt32(pbf.pos, false) | ||
pbf.pos += 4 | ||
pbf.length = pbf.pos + blobHeaderLength | ||
// we now know the length of the first blobHeader: wind the pbf buffer forward and parse the data | ||
pbf.forward(blobHeaderLength) | ||
blobHeader = FileFormat.BlobHeader.read(pbf) | ||
//console.error(blobHeader) | ||
/* A BlobHeader contains information about the following data blob. | ||
* | ||
* Definition: | ||
* message BlobHeader { | ||
* required string type = 1; | ||
* optional bytes indexdata = 2; | ||
* required int32 datasize = 3; | ||
* } | ||
* | ||
* Example: | ||
* { type: 'OSMHeader', indexdata: null, datasize: 72 } | ||
*/ | ||
pbf.pos = pbf.length | ||
pbf.length = pbf.pos + blobHeader.datasize | ||
if (blobHeader.type !== 'OSMHeader') { | ||
throw new Error("unsupported: expected first osmpbf blob to be of type 'OSMHeader', but found '"+OSMHeader.type+"'") | ||
} | ||
// the blob header knows the size of the following data blob: | ||
pbf.forward(blobHeader.datasize) | ||
blob = FileFormat.Blob.read(pbf) | ||
/* A blob is used to store an (either uncompressed or zlib/deflate compressed) | ||
* blob of osm data. | ||
* | ||
* Definition: | ||
* message Blob { | ||
* optional bytes raw = 1; // No compression | ||
* optional int32 raw_size = 2; // When compressed, the uncompressed size | ||
* // Possible compressed versions of the data. | ||
* optional bytes zlib_data = 3; | ||
* // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED. | ||
* optional bytes lzma_data = 4; | ||
* // Formerly used for bzip2 compressed data. Depreciated (sic) in 2010. | ||
* optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number. | ||
* } | ||
*/ | ||
blobData = extractBlobData(blob) | ||
// blobData is still now a protocol buffer (pbf) message | ||
var osmHeader = new Pbf(blobData) | ||
osmHeader = OsmFormat.HeaderBlock.read(osmHeader) | ||
//console.error(osmHeader) | ||
/* The first blob of an osm pbf file must contain an osmHeader message. It | ||
* contains several metadata fields about the osmpbf file (timestamps, source | ||
* string, etc.). It also indicated which features a parser must support in | ||
* order to correctly parse the file. | ||
* | ||
* Definition: | ||
* message HeaderBlock { | ||
* optional HeaderBBox bbox = 1; | ||
* // Additional tags to aid in parsing this dataset | ||
* repeated string required_features = 4; | ||
* repeated string optional_features = 5; | ||
* optional string writingprogram = 16; | ||
* optional string source = 17; // From the bbox field. | ||
* // Tags that allow continuing an Osmosis replication: | ||
* // replication timestamp, expressed in seconds since the epoch, | ||
* // otherwise the same value as in the "timestamp=..." field | ||
* // in the state.txt file used by Osmosis | ||
* optional int64 osmosis_replication_timestamp = 32; | ||
* // replication sequence number (sequenceNumber in state.txt) | ||
* optional int64 osmosis_replication_sequence_number = 33; | ||
* // replication base URL (from Osmosis' configuration.txt file) | ||
* optional string osmosis_replication_base_url = 34; | ||
* } | ||
* | ||
* Example: | ||
* { bbox: null, | ||
* required_features: [ 'OsmSchema-V0.6', 'DenseNodes' ], | ||
* optional_features: [], | ||
* writingprogram: 'Overpass API prototype', | ||
* source: '', | ||
* osmosis_replication_timestamp: 1462060800, | ||
* osmosis_replication_sequence_number: 0, | ||
* osmosis_replication_base_url: '' } | ||
*/ | ||
@@ -92,10 +144,16 @@ // check for required_features | ||
pbf.pos = pbf.length | ||
pbf.forward(4) | ||
blobHeaderLength = new DataView(new Uint8Array(input).buffer).getInt32(pbf.pos, false) | ||
pbf.pos += 4 | ||
pbf.length = pbf.pos + blobHeaderLength | ||
pbf.forward(blobHeaderLength) | ||
blobHeader = FileFormat.BlobHeader.read(pbf) | ||
pbf.pos = pbf.length | ||
pbf.length = pbf.pos + blobHeader.datasize | ||
if (blobHeader.type !== 'OSMData') { | ||
// ignore any unknown data blobs (which may or may not be introduced in future versions of the osmpbf format) | ||
pbf.forward(blobHeader.datasize) | ||
continue | ||
} | ||
// the blobHeader contains the size of the following data blob | ||
pbf.forward(blobHeader.datasize) | ||
blob = FileFormat.Blob.read(pbf) | ||
@@ -108,4 +166,36 @@ | ||
//console.error(osmData) | ||
//console.error(osmData.primitivegroup[0].dense) | ||
/* The actual OSM data is stored in a list of PrimitiveBlock messages. Each | ||
* one contains a some metadata about the data in this block (e.g. lat/lon | ||
* offsets), a stringtable for tag keys/values (and user names) and a list | ||
* of "PrimitiveGroup"s, each containing a list of OSM element of the same | ||
* type (i.e. nodes, ways or relations). | ||
* | ||
* Definition: | ||
* message PrimitiveBlock { | ||
* required StringTable stringtable = 1; | ||
* repeated PrimitiveGroup primitivegroup = 2; | ||
* // Granularity, units of nanodegrees, used to store coordinates in this block | ||
* optional int32 granularity = 17 [default=100]; | ||
* // Offset value between the output coordinates coordinates and the granularity grid, in units of nanodegrees. | ||
* optional int64 lat_offset = 19 [default=0]; | ||
* optional int64 lon_offset = 20 [default=0]; | ||
* // Granularity of dates, normally represented in units of milliseconds since the 1970 epoch. | ||
* optional int32 date_granularity = 18 [default=1000]; | ||
* // Proposed extension: | ||
* //optional BBox bbox = XX; | ||
* } | ||
* | ||
* Example: | ||
* { stringtable: { s: [ [Object], [Object], [Object], [Object] ] }, | ||
* primitivegroup: | ||
* [ { nodes: [], | ||
* dense: [Object], | ||
* ways: [], | ||
* relations: [], | ||
* changesets: [] } ], | ||
* granularity: 100, | ||
* lat_offset: 0, | ||
* lon_offset: 0, | ||
* date_granularity: 1000 } | ||
*/ | ||
@@ -120,2 +210,3 @@ // unpack stringtable into js object | ||
// coordinate granularity: set default, invert and pre-scale to nano-degrees | ||
// (inversion helps to eliminate double precision rounding errors later on) | ||
if (!osmData.granularity || osmData.granularity === 100) | ||
@@ -131,3 +222,13 @@ osmData.granularity = 1E7 | ||
osmData.primitivegroup.forEach(function(p) { | ||
// each "primitivegroup" can either be a list of changesets, | ||
/* Each "primitivegroup" can either be a list of changesets, relations, ways, nodes or "dense" nodes | ||
* | ||
* Definition: | ||
* message PrimitiveGroup { | ||
* repeated Node nodes = 1; | ||
* optional DenseNodes dense = 2; | ||
* repeated Way ways = 3; | ||
* repeated Relation relations = 4; | ||
* repeated ChangeSet changesets = 5; | ||
* } | ||
*/ | ||
switch(true) { | ||
@@ -145,3 +246,39 @@ // error cases: | ||
// * list of osm relations | ||
/* A list of osm relations. | ||
* | ||
* Definition: | ||
* message Relation { | ||
* enum MemberType { | ||
* NODE = 0; | ||
* WAY = 1; | ||
* RELATION = 2; | ||
* } | ||
* required int64 id = 1; | ||
* // Parallel arrays. | ||
* repeated uint32 keys = 2 [packed = true]; | ||
* repeated uint32 vals = 3 [packed = true]; | ||
* optional Info info = 4; | ||
* // Parallel arrays | ||
* repeated int32 roles_sid = 8 [packed = true]; | ||
* repeated sint64 memids = 9 [packed = true]; // DELTA encoded | ||
* repeated MemberType types = 10 [packed = true]; | ||
* } | ||
* | ||
* message Info { | ||
* optional int32 version = 1 [default = -1]; | ||
* optional int32 timestamp = 2; | ||
* optional int64 changeset = 3; | ||
* optional int32 uid = 4; | ||
* optional int32 user_sid = 5; // String IDs | ||
* // The visible flag is used to store history information. It indicates that | ||
* // the current object version has been created by a delete operation on the | ||
* // OSM API. | ||
* // When a writer sets this flag, it MUST add a required_features tag with | ||
* // value "HistoricalInformation" to the HeaderBlock. | ||
* // If this flag is not available for some object it MUST be assumed to be | ||
* // true if the file has the required_features tag "HistoricalInformation" | ||
* // set. | ||
* optional bool visible = 6; | ||
* } | ||
*/ | ||
case p.relations.length > 0: | ||
@@ -177,3 +314,14 @@ for (var i=0; i<p.relations.length; i++) { | ||
// * list of osm ways | ||
/* A list of osm ways | ||
* | ||
* Definition: | ||
* message Way { | ||
* required int64 id = 1; | ||
* // Parallel arrays. | ||
* repeated uint32 keys = 2 [packed = true]; | ||
* repeated uint32 vals = 3 [packed = true]; | ||
* optional Info info = 4; | ||
* repeated sint64 refs = 8 [packed = true]; // DELTA coded | ||
* } | ||
*/ | ||
case p.ways.length > 0: | ||
@@ -205,3 +353,15 @@ for (var i=0; i<p.ways.length; i++) { | ||
// * list of osm nodes | ||
/* A basic list of osm nodes (dense nodes are more common, see below) | ||
* | ||
* Definition: | ||
* message Node { | ||
* required sint64 id = 1; | ||
* // Parallel arrays. | ||
* repeated uint32 keys = 2 [packed = true]; // String IDs. | ||
* repeated uint32 vals = 3 [packed = true]; // String IDs. | ||
* optional Info info = 4; | ||
* required sint64 lat = 8; | ||
* required sint64 lon = 9; | ||
* } | ||
*/ | ||
case p.nodes.length > 0: | ||
@@ -231,3 +391,33 @@ for (var i=0; i<p.nodes.length; i++) { | ||
// * dense list of osm nodes | ||
/* A "dense" list of osm nodes that uses a better packed & delta-encoded | ||
* format: | ||
* | ||
* Definition: | ||
* message DenseNodes { | ||
* repeated sint64 id = 1 [packed = true]; // DELTA coded | ||
* //repeated Info info = 4; | ||
* optional DenseInfo denseinfo = 5; | ||
* repeated sint64 lat = 8 [packed = true]; // DELTA coded | ||
* repeated sint64 lon = 9 [packed = true]; // DELTA coded | ||
* // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless. | ||
* repeated int32 keys_vals = 10 [packed = true]; | ||
* } | ||
* | ||
* message DenseInfo { | ||
* repeated int32 version = 1 [packed = true]; | ||
* repeated sint64 timestamp = 2 [packed = true]; // DELTA coded | ||
* repeated sint64 changeset = 3 [packed = true]; // DELTA coded | ||
* repeated sint32 uid = 4 [packed = true]; // DELTA coded | ||
* repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded | ||
* // The visible flag is used to store history information. It indicates that | ||
* // the current object version has been created by a delete operation on the | ||
* // OSM API. | ||
* // When a writer sets this flag, it MUST add a required_features tag with | ||
* // value "HistoricalInformation" to the HeaderBlock. | ||
* // If this flag is not available for some object it MUST be assumed to be | ||
* // true if the file has the required_features tag "HistoricalInformation" | ||
* // set. | ||
* repeated bool visible = 6 [packed = true]; | ||
* } | ||
*/ | ||
case p.dense !== null: | ||
@@ -257,2 +447,9 @@ var id=0,lat=0,lon=0,timestamp=0,changeset=0,uid=0,user=0 | ||
var tags = {} | ||
/* tag keys and values are encoded as a single array of stringid's: | ||
* the pattern is: ((<keyid> <valid>)* '0' )* | ||
* (each node's tags are encoded as alternating <keyid> <valid>, a | ||
* single stringid of 0 delimits tags of one node from tags of the | ||
* next node.) | ||
* if no node in the primitivegroup has a tag, it can be left empty. | ||
*/ | ||
if (p.dense.keys_vals.length > 0) { | ||
@@ -313,1 +510,46 @@ while (p.dense.keys_vals[j] != 0) { | ||
} | ||
/* some helpers */ | ||
// supported osmpbf "features" | ||
var supportedFeatures = { | ||
"OsmSchema-V0.6": true, | ||
"DenseNodes": true, | ||
"HistoricalInformation": true | ||
} | ||
// convert enum values to actual relation member types | ||
var memberTypes = { | ||
0: 'node', | ||
1: 'way', | ||
2: 'relation' | ||
} | ||
// helper function that extracts / decompresses a data blob | ||
function extractBlobData(blob) { | ||
// todo: add tests for non-zlib cases | ||
switch (true) { | ||
// error cases: | ||
// * lzma compressed data (support for this kind of data is not required by the specs) | ||
case blob.lzma_data !== null: | ||
throw new Error("unsupported osmpbf blob data type: lzma_data") | ||
// * formerly used for bzip2 compressed data, deprecated since 2010 | ||
case blob.OBSOLETE_bzip2_data !== null: | ||
throw new Error("unsupported osmpbf blob data type: OBSOLETE_bzip2_data") | ||
// * empty data blob?? | ||
default: | ||
throw new Error("unsupported osmpbf blob data type: <empty blob>") | ||
// supported data formats: | ||
// * uncompressed data | ||
case blob.raw !== null: | ||
return blob.raw | ||
// * zlib "deflate" compressed data | ||
case blob.zlib_data !== null: | ||
var blobData = new Buffer(blob.raw_size) | ||
inflate(blob.zlib_data.slice(2), blobData) | ||
return blobData | ||
} | ||
} |
{ | ||
"name": "tiny-osmpbf", | ||
"version": "0.1.0", | ||
"version": "1.0.0-beta.1", | ||
"description": "smallest osm.pbf parser", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
231017
2499