Comparing version 0.0.1 to 0.0.2
/* | ||
load dependencies | ||
*/ | ||
var fs = require("fs"), | ||
_ = require("underscore"), | ||
phpjs = require("phpjs"); | ||
* @package emlx2json | ||
* @copyright Copyright(c) 2013 Wouter Vroege. <wouter AT woutervroege DOT nl> | ||
* @author Wouter Vroege <wouter AT woutervroege DOT nl> | ||
* @license http://github.com/woutervroege/emlx2json/blob/master/LICENSE MIT License | ||
*/ | ||
"use strict"; | ||
exports.parseFile = function(filePath, callback) { | ||
fs.readFile(filePath, function(err, data) { | ||
if (err) | ||
return callback(err); | ||
var message = parse(data.toString()); | ||
callback(null, message); | ||
}) | ||
} | ||
var fs = require("fs"); | ||
var _ = require("underscore"); | ||
var phpjs = require("phpjs"); | ||
exports.parseFileSync = function(filePath) { | ||
var data = fs.readFileSync(filePath).toString(); | ||
return parse(data); | ||
} | ||
var emlx2json = module.exports = { | ||
exports.parseString = function(string) { | ||
return parse(string); | ||
} | ||
/** | ||
* Parse File (async) | ||
* | ||
* @param {String} filePath | ||
* @return object | ||
*/ | ||
function parse(data) { | ||
var Message = {}; | ||
var contents = stripUnixNewLines(data); | ||
var header = parseHeader(contents); | ||
var body = parseBody(contents); | ||
var headerKeys = getHeaderKeys(header); | ||
var kv = parseKeyValuePairs(headerKeys, header); | ||
_.map(kv, function(item, key) { | ||
Message[item.key] = item.value; | ||
}) | ||
var contentTypeBoundaries = getContentTypeBoundaries(Message, body); | ||
Message['parts'] = parseBodyParts(body, contentTypeBoundaries); | ||
return Message; | ||
} | ||
parseFile: function (filePath, callback) { | ||
fs.readFile(filePath, function (err, data) { | ||
if (err) | ||
return callback(err); | ||
var message = parse(data.toString()); | ||
callback(null, message); | ||
}) | ||
}, | ||
function parseHeader(contents) { | ||
return contents.split(/\n\n/)[0]; | ||
} | ||
/** | ||
* Parse File (sync) | ||
* | ||
* @param {String} filePath | ||
* @return object | ||
*/ | ||
function parseBody(contents) { | ||
var chunks = contents.split(/\n\n/g); | ||
return contents | ||
.substr(chunks[0].length) | ||
.split(/\<?xml /g)[0] | ||
.replace("<?", "") | ||
.replace(/\s+$/g, "") | ||
} | ||
parseFileSync: function (filePath) { | ||
var data = fs.readFileSync(filePath).toString(); | ||
return parse(data); | ||
}, | ||
function getHeaderKeys(header) { | ||
return _.uniq(_.map(header.match(/(\n|^)[A-Z]+.*?\:/gi), function(item) { | ||
return item.replace(/(^\n|\:$)/g, ""); | ||
})); | ||
} | ||
/** | ||
* Parse String (async) | ||
* | ||
* @param {String} str | ||
* @return object | ||
*/ | ||
function stripUnixNewLines(content) { | ||
return content.replace(/\r\n/g, "\n"); | ||
parseString: function (str) { | ||
return parse(string); | ||
} | ||
} | ||
function getBodyParts(content) { | ||
return content.split(/\-\-Apple-Mail/gi); | ||
} | ||
function parse(data) { | ||
var Message = {}; | ||
var contents = stripUnixNewLines(data); | ||
var header = parseHeader(contents); | ||
var body = parseBody(contents); | ||
var headerKeys = getHeaderKeys(header); | ||
var kv = parseKeyValuePairs(headerKeys, header); | ||
_.map(kv, function (item, key) { | ||
Message[item.key] = item.value; | ||
}) | ||
var contentTypeBoundaries = getContentTypeBoundaries(Message, body); | ||
Message['parts'] = parseBodyParts(body, contentTypeBoundaries); | ||
return Message; | ||
} | ||
function parseKeyValuePairs(keys, header) { | ||
function parseHeader(contents) { | ||
return contents.split(/\n\n/)[0]; | ||
} | ||
var currentIndex = 0; | ||
var maxIndex = header.length; | ||
function parseBody(contents) { | ||
var chunks = contents.split(/\n\n/g); | ||
return contents | ||
.substr(chunks[0].length) | ||
.split(/\<?xml /g)[0] | ||
.replace("<?", "") | ||
.replace(/\s+$/g, "") | ||
} | ||
var content = header.replace(/\-/g, "\\-"); | ||
function getHeaderKeys(header) { | ||
return _.uniq(_.map(header.match(/(\n|^)[A-Z]+.*?\:/gi), function (item) { | ||
return item.replace(/(^\n|\:$)/g, ""); | ||
})); | ||
} | ||
return _.map(keys, function(key, i) { | ||
var currentKey = key.replace(/\-/g, "\\-"); | ||
var nextKey = keys[i + 1] | ||
function stripUnixNewLines(content) { | ||
return content.replace(/\r\n/g, "\n"); | ||
} | ||
var pattern = eval("/(^|\\n)" + currentKey + "/"); | ||
var pattern2 = (nextKey) ? eval("/(^|\\n)" + nextKey.replace(/\-/g, "\\-") + "/") : ""; | ||
var start = header.match(pattern).index; | ||
var end = (nextKey) ? header.match(pattern2).index : maxIndex; | ||
function getBodyParts(content) { | ||
return content.split(/boundary=.*?$/gi); | ||
} | ||
var chunk = header.substring(start, end); | ||
var value = chunk | ||
.replace(eval("/((^|\\n)" + currentKey + "\: |\\n$)/g"), "") | ||
.replace(/(^\s|\s$|\;$)/g, ""); | ||
return { | ||
key: key, | ||
start: start, | ||
end: end, | ||
value: value | ||
} | ||
}); | ||
} | ||
function parseKeyValuePairs(keys, header) { | ||
function getContentTypeBoundaries(message, body) { | ||
var values = []; | ||
var headerValue = getHeadercontentTypeBoundaries(message); | ||
if(headerValue) | ||
values.push(headerValue); | ||
var bodyvalues = body.match(/boundary\=.*?\n/g); | ||
var b = _.map(bodyvalues, function(item) { | ||
return item | ||
.replace(/.*?boundary\=/g, "") | ||
.replace(/^\n|\n$|\"/g, "") | ||
}) | ||
values = values.concat(b); | ||
return values; | ||
} | ||
var currentIndex = 0; | ||
var maxIndex = header.length; | ||
function getHeadercontentTypeBoundaries(message) { | ||
var contentTypeKey = message["Content-Type"] || message["Content-type"]; | ||
if (!contentTypeKey) | ||
return false; | ||
var queryBoundary = contentTypeKey.match(/boundary=.*?$/gi); | ||
return (queryBoundary) ? queryBoundary[0].replace(/(boundary\=|\")/g, "") : false; | ||
} | ||
var content = header.replace(/\-/g, "\\-"); | ||
function getBodyParts(body, contentTypeBoundaries) { | ||
if (contentTypeBoundaries.length === 0) | ||
return body.replace(/(\s+|\n+)$/g, ""); | ||
return _.map(keys, function (key, i) { | ||
var currentKey = key.replace(/\-/g, "\\-"); | ||
var nextKey = keys[i + 1] | ||
var bodyLines = body.split(/\n/g); | ||
var parts = [[]]; | ||
var currentPartIndex = 0; | ||
var pattern = eval("/(^|\\n)" + currentKey + "/"); | ||
var pattern2 = (nextKey) ? eval("/(^|\\n)" + nextKey.replace(/\-/g, "\\-") + "/") : ""; | ||
var start = header.match(pattern).index; | ||
var end = (nextKey) ? header.match(pattern2).index : maxIndex; | ||
for(var i in bodyLines) { | ||
for(var x = 0;x<contentTypeBoundaries.length;x++) { | ||
if(bodyLines[i].indexOf(contentTypeBoundaries[x]) !== -1) { | ||
currentPartIndex++; | ||
parts[currentPartIndex] = []; | ||
} else { | ||
var curIndex = parts[currentPartIndex].length -1; | ||
if(parts[currentPartIndex][curIndex] !== bodyLines[i]) { | ||
var l = bodyLines[i]; | ||
for(var z in contentTypeBoundaries) { | ||
l = l.replace(eval("/^(-+|)"+escapeSpecialChars(contentTypeBoundaries[z])+".*?(\\n|$)/g"), ""); | ||
var chunk = header.substring(start, end); | ||
var value = chunk | ||
.replace(eval("/((^|\\n)" + currentKey + "\: |\\n$)/g"), "") | ||
.replace(/(^\s|\s$|\;$)/g, ""); | ||
return { | ||
key: key, | ||
start: start, | ||
end: end, | ||
value: value | ||
} | ||
}); | ||
} | ||
function getContentTypeBoundaries(message, body) { | ||
var values = []; | ||
var headerValue = getHeadercontentTypeBoundaries(message); | ||
if (headerValue) | ||
values.push(headerValue); | ||
var bodyvalues = body.match(/boundary\=.*?\n/g); | ||
var b = _.map(bodyvalues, function (item) { | ||
return item | ||
.replace(/.*?boundary\=/g, "") | ||
.replace(/^\n|\n$|\"/g, "") | ||
}) | ||
values = values.concat(b); | ||
return values; | ||
} | ||
function getHeadercontentTypeBoundaries(message) { | ||
var contentTypeKey = message["Content-Type"] || message["Content-type"]; | ||
if (!contentTypeKey) | ||
return false; | ||
var queryBoundary = contentTypeKey.match(/boundary=.*?$/gi); | ||
return (queryBoundary) ? queryBoundary[0].replace(/(boundary\=|\")/g, "") : false; | ||
} | ||
function getBodyParts(body, contentTypeBoundaries) { | ||
if (contentTypeBoundaries.length === 0) | ||
return body.replace(/(\s+|\n+)$/g, ""); | ||
var bodyLines = body.split(/\n/g); | ||
var parts = [ | ||
[] | ||
]; | ||
var currentPartIndex = 0; | ||
for (var i in bodyLines) { | ||
for (var x = 0; x < contentTypeBoundaries.length; x++) { | ||
if (bodyLines[i].indexOf(contentTypeBoundaries[x]) !== -1) { | ||
currentPartIndex++; | ||
parts[currentPartIndex] = []; | ||
} else { | ||
var curIndex = parts[currentPartIndex].length - 1; | ||
if (parts[currentPartIndex][curIndex] !== bodyLines[i]) { | ||
var l = bodyLines[i]; | ||
for (var z in contentTypeBoundaries) { | ||
l = l.replace(eval("/^(-+|)" + escapeSpecialChars(contentTypeBoundaries[z]) + ".*?(\\n|$)/g"), ""); | ||
} | ||
parts[currentPartIndex].push(l); | ||
} | ||
parts[currentPartIndex].push(l); | ||
} | ||
} | ||
} | ||
} | ||
var items = _.map(parts, function(item) { | ||
return item.join("\n"); | ||
}) | ||
var items = _.map(parts, function (item) { | ||
return item.join("\n"); | ||
}) | ||
return items; | ||
} | ||
return items; | ||
} | ||
function parseBodyParts(body, contentTypeBoundaries) { | ||
var items = []; | ||
var bodyParts = getBodyParts(body, contentTypeBoundaries); | ||
_.map(bodyParts, function(bodyPart) { | ||
var item = parseSingleBodyPart(bodyPart); | ||
if (!(Object.keys(item.headers).length === 0 && item.body.length === 0)) | ||
items.push(item); | ||
}) | ||
return items; | ||
} | ||
function parseBodyParts(body, contentTypeBoundaries) { | ||
var items = []; | ||
if(contentTypeBoundaries.length === 0) { | ||
items.push(parseSingleBodyPart(body)); | ||
return items; | ||
} | ||
var bodyParts = getBodyParts(body, contentTypeBoundaries); | ||
_.map(bodyParts, function (bodyPart) { | ||
var item = parseSingleBodyPart(bodyPart); | ||
if (!(Object.keys(item.headers).length === 0 && item.body.length === 0)) | ||
items.push(item); | ||
}) | ||
return items; | ||
} | ||
function parseSingleBodyPart(bodyPart) { | ||
var headerAndBodyChunks = bodyPart.split(/\n\n/g); | ||
var rawHeader = headerAndBodyChunks[0]; | ||
var headerKeys = getHeaderKeys(rawHeader); | ||
var headerKeyValues = parseKeyValuePairs(headerKeys, rawHeader); | ||
var headers = parseBodyPartHeadersFromKeyValuePairs(headerKeyValues); | ||
var body = stickBodyParts(headerAndBodyChunks); | ||
body = cleanupBody(body, headers); | ||
return { | ||
headers: headers, | ||
body: body || "" | ||
}; | ||
} | ||
function parseSingleBodyPart(bodyPart) { | ||
var headerAndBodyChunks = bodyPart.split(/\n\n/g); | ||
var rawHeader = headerAndBodyChunks[0]; | ||
var headerKeys = getHeaderKeys(rawHeader); | ||
var headerKeyValues = parseKeyValuePairs(headerKeys, rawHeader); | ||
var headers = parseBodyPartHeadersFromKeyValuePairs(headerKeyValues); | ||
var body = stickBodyParts(headerAndBodyChunks); | ||
body = cleanupBody(body, headers); | ||
return { | ||
headers: headers, | ||
body: body || "" | ||
}; | ||
} | ||
function stickBodyParts(bodyParts) { | ||
return bodyParts.splice(1).join("\n\n"); | ||
} | ||
function stickBodyParts(bodyParts) { | ||
return bodyParts.splice(1).join("\n\n"); | ||
} | ||
function parseBodyPartHeadersFromKeyValuePairs(headerKeyValues) { | ||
var headers = {}; | ||
_.map(headerKeyValues, function(item) { | ||
headers[item.key] = item.value; | ||
}); | ||
return headers; | ||
} | ||
function parseBodyPartHeadersFromKeyValuePairs(headerKeyValues) { | ||
var headers = {}; | ||
_.map(headerKeyValues, function (item) { | ||
headers[item.key] = item.value; | ||
}); | ||
return headers; | ||
} | ||
function cleanupBody(body, headers) { | ||
if (headers) { | ||
if (headers['Content-Transfer-Encoding'] && headers['Content-Transfer-Encoding'].toLowerCase() == 'quoted-printable') { | ||
body = removeQuotedPrintables(body); | ||
function cleanupBody(body, headers) { | ||
if (headers) { | ||
if (headers['Content-Transfer-Encoding'] && headers['Content-Transfer-Encoding'].toLowerCase() == 'quoted-printable') { | ||
body = removeQuotedPrintables(body); | ||
} | ||
if (headers['Content-Type'] && headers['Content-Type'].match(/^text\/html/)) { | ||
body = removePartsOutsideHtml(stripUselessWhiteSpace(stripNewLines(body))); | ||
} | ||
} | ||
if (headers['Content-Type'] && headers['Content-Type'].match(/^text\/html/)) { | ||
body = removePartsOutsideHtml(stripUselessWhiteSpace(stripNewLines(body))); | ||
} | ||
return body; | ||
} | ||
return body; | ||
} | ||
function removeQuotedPrintables(html) { | ||
return phpjs.quoted_printable_decode(html); | ||
} | ||
function removeQuotedPrintables(html) { | ||
return phpjs.quoted_printable_decode(html); | ||
} | ||
function stripNewLines(text) { | ||
return text.replace(/(\=\n|\n|\=$)/g, ""); | ||
} | ||
function stripNewLines(text) { | ||
return text.replace(/(\=\n|\n|\=$)/g, ""); | ||
} | ||
function stripUselessWhiteSpace(text) { | ||
return text.replace(/\s+/g, " "); | ||
} | ||
function stripUselessWhiteSpace(text) { | ||
return text.replace(/\s+/g, " "); | ||
} | ||
function removePartsOutsideHtml(text) { | ||
return text.replace(/\<\/html\>(.*?)$/g, "</html>") | ||
} | ||
function removePartsOutsideHtml(text) { | ||
return text.replace(/\<\/html\>(.*?)$/g, "</html>") | ||
} | ||
function escapeSpecialChars(str) { | ||
specials = [ /*strict order*/ "-", "[", "]" /*non-strict order */ , "/", "{", "}", "(", ")", "*", "+", "?", ".", "\\", "^", "$", "|"]; | ||
replacePattern = RegExp('[' + specials.join('\\') + ']', 'g'); | ||
return str.replace(replacePattern, "\\$&"); | ||
} | ||
function escapeSpecialChars(str) { | ||
var specials = [ /*strict order*/ "-", "[", "]" /*non-strict order */ , "/", "{", "}", "(", ")", "*", "+", "?", ".", "\\", "^", "$", "|"]; | ||
var replacePattern = RegExp('[' + specials.join('\\') + ']', 'g'); | ||
return str.replace(replacePattern, "\\$&"); | ||
} |
{ | ||
"name": "emlx2json", | ||
"version": "0.0.1", | ||
"description": "JSON parser for Apple Mail.app messages (.emlx)", | ||
"version": "0.0.2", | ||
"description": "JSON parser for Apple Mail (Mail.app) messages (.emlx)", | ||
"dependencies": { | ||
@@ -18,3 +18,3 @@ "underscore": "1.5.x", | ||
"main": "lib/emlx2json.js", | ||
"_id": "emlx2json@0.0.1" | ||
"_id": "emlx2json@0.0.2" | ||
} |
10035
5
222