email-reply-parser
Advanced tools
Comparing version 1.2.6 to 1.3.0
var FragmentDTO = require("./fragmentdto"); | ||
var Fragment = require("../fragment"); | ||
var Email = require("../email"); | ||
var RegexList = require("../regex"); | ||
@@ -9,50 +10,2 @@ const QUOTE_REGEX = /(>+)$/; | ||
constructor() { | ||
this.quoteHeadersRegex = [ | ||
/^\-*\s*(On(?:(?!^>*\s*On\b|\bwrote(:)?)[\s\S]){0,1000}wrote:?)\s*\-*$/m, // On DATE, NAME <EMAIL> wrote: | ||
/^\s*(Le(?:(?!^>*\s*Le\b|\bécrit:)[\s\S]){0,1000}écrit :)$/m, // Le DATE, NAME <EMAIL> a écrit : | ||
/^\s*(El(?:(?!^>*\s*El\b|\bescribió:)[\s\S]){0,1000}escribió:)$/m, // El DATE, NAME <EMAIL> escribió: | ||
/^\s*(Il(?:(?!^>*\s*Il\b|\bscritto:)[\s\S]){0,1000}scritto:)$/m, // Il DATE, NAME <EMAIL> ha scritto: | ||
/^\s*(Em(?:(?!^>*\s*Em\b|\bescreveu:)[\s\S]){0,1000}escreveu:)$/m, // Em DATE, NAME <EMAIL>escreveu: | ||
/^\s*(Am\s.+\s)schrieb.+\s?(\[|<).+(\]|>):$/m, // Am DATE schrieb NAME <EMAIL>: | ||
/^\s*(Op\s[\s\S]+?schreef[\s\S]+:)$/m, // Il DATE, schreef NAME <EMAIL>: | ||
/^\s*((W\sdniu|Dnia)\s[\s\S]+?(pisze|napisał(\(a\))?):)$/mu, // W dniu DATE, NAME <EMAIL> pisze|napisał: | ||
/^\s*(Den\s.+\sskrev\s.+:)$/m, // Den DATE skrev NAME <EMAIL>: | ||
/^\s*(pe\s.+\s.+kirjoitti:)$/m, // pe DATE NAME <EMAIL> kirjoitti: | ||
/^\s*(Am\s.+\sum\s.+\sschrieb\s.+:)$/m, // Am DATE um TIME schrieb NAME: | ||
/^(在[\s\S]+写道:)$/m, // > 在 DATE, TIME, NAME 写道: | ||
/^(20[0-9]{2}\..+\s작성:)$/m, // DATE TIME NAME 작성: | ||
/^(20[0-9]{2}\/.+のメッセージ:)$/m, // DATE TIME、NAME のメッセージ: | ||
/^(.+\s<.+>\sschrieb:)$/m, // NAME <EMAIL> schrieb: | ||
/^(.+\son.*at.*wrote:)$/m, // NAME on DATE wrote: | ||
/^\s*(From\s?:.+\s?\n?\s*[\[|<].+[\]|>])/m, // "From: NAME <EMAIL>" OR "From : NAME <EMAIL>" OR "From : NAME<EMAIL>"(With support whitespace before start and before <) | ||
/^\s*(De\s?:.+\s?\n?\s*(\[|<).+(\]|>))/m, // "De: NAME <EMAIL>" OR "De : NAME <EMAIL>" OR "De : NAME<EMAIL>" (With support whitespace before start and before <) | ||
/^\s*(Van\s?:.+\s?\n?\s*(\[|<).+(\]|>))/m, // "Van: NAME <EMAIL>" OR "Van : NAME <EMAIL>" OR "Van : NAME<EMAIL>" (With support whitespace before start and before <) | ||
/^\s*(Da\s?:.+\s?\n?\s*(\[|<).+(\]|>))/m, // "Da: NAME <EMAIL>" OR "Da : NAME <EMAIL>" OR "Da : NAME<EMAIL>" (With support whitespace before start and before <) | ||
/^(20[0-9]{2})-([0-9]{2}).([0-9]{2}).([0-9]{2}):([0-9]{2})*.(.*)?\n?(.*)>:$/m, // 20YY-MM-DD HH:II GMT+01:00 NAME <EMAIL>: | ||
/^\s*([a-z]{3,4}\.\s[\s\S]+\sskrev\s[\s\S]+:)$/m, // DATE skrev NAME <EMAIL>: | ||
/^([0-9]{2}).([0-9]{2}).(20[0-9]{2})(.*)(([0-9]{2}).([0-9]{2}))(.*)\"( *)<(.*)>( *):$/m, // DD.MM.20YY HH:II NAME <EMAIL> | ||
]; | ||
this.signatureRegex = [ | ||
/^\s*-{2,4}$/, | ||
/^\s*_{2,4}$/, | ||
/^-- $/, | ||
/^-- \s*.+$/, | ||
/^________________________________$/, | ||
/^-{1,10}Original message-{1,10}$/, | ||
/^Sent from (?:\s*.+)$/, | ||
/^Von (?:\s*.+) gesendet$/, | ||
/^Envoyé depuis (?:\s*.+)$/, | ||
/^Enviado desde (?:\s*.+)$/, | ||
/^\+{2,4}$/, | ||
/^\={2,4}$/, | ||
/^Get Outlook for (?:\s*.+).*/m, | ||
/^Télécharger Outlook pour (?:\s*.+).*/m, | ||
/^\w{0,20}\s?Regards,?!?$/mi, | ||
/^Cheers,?!?$/mi, | ||
/^Best wishes,?!?$/mi, | ||
/^Bien . vous,?!?$/mi, | ||
/^\w{0,20}\s?cordialement,?!?$/mi,, | ||
]; | ||
this.fragments = []; | ||
@@ -140,3 +93,3 @@ } | ||
// remove any new lines that happen to match in the first capture group | ||
this.quoteHeadersRegex.forEach((regex) => { | ||
RegexList.quoteHeadersRegex.forEach((regex) => { | ||
let matches = newText.match(regex); | ||
@@ -156,7 +109,7 @@ if (matches) { | ||
getQuoteHeadersRegex() { | ||
return this.quoteHeadersRegex; | ||
return RegexList.quoteHeadersRegex; | ||
} | ||
setQuoteHeadersRegex(quoteHeadersRegex) { | ||
this.quoteHeadersRegex = quoteHeadersRegex; | ||
RegexList.quoteHeadersRegex = quoteHeadersRegex; | ||
@@ -184,3 +137,3 @@ return this; | ||
this.quoteHeadersRegex.forEach((regex) => { | ||
RegexList.quoteHeadersRegex.forEach((regex) => { | ||
if (regex.test(this.stringReverse(line))) { | ||
@@ -197,3 +150,3 @@ hasHeader = true; | ||
return this.signatureRegex.some((regex) => { | ||
return RegexList.signatureRegex.some((regex) => { | ||
return regex.test(text); | ||
@@ -200,0 +153,0 @@ }); |
{ | ||
"name": "email-reply-parser", | ||
"version": "1.2.6", | ||
"version": "1.3.0", | ||
"description": "Node library for parsing plain text email content. Based on https://github.com/willdurand/EmailReplyParser", | ||
@@ -11,6 +11,9 @@ "main": "lib/emailreplyparser.js", | ||
"license": "MIT", | ||
"dependencies": { | ||
"re2": "1.16.0" | ||
}, | ||
"devDependencies": { | ||
"nodeunit": "v0.11.1", | ||
"underscore": "v1.8.3" | ||
"underscore": "v1.12.1" | ||
} | ||
} |
@@ -5,3 +5,3 @@ # Email Reply Parser | ||
**Email Reply Parser is a library to parse plain-text email replies and extract content** | ||
**Email Reply Parser is a node library to parse plain-text email replies and extract content** | ||
@@ -31,5 +31,13 @@ This library supports most email replies, signatures and locales. | ||
## Features | ||
This library is used at [Crisp](https://crisp.chat/) everyday with around 1 million inbound emails. Over the years, we improved this library so tt can work with most emails. | ||
- Strip email replies like `On DATE, NAME <EMAIL> wrote:` | ||
- Supports around **10 locales**, including English, French, Spanish, Portuguese, Italian, Japanese, Chinese. | ||
- Removes signatures like `Sent from my iPhone` | ||
- Removes signatures like `Best wishes` | ||
## Usage | ||
``` javascript | ||
@@ -43,3 +51,2 @@ var EmailReplyParser = require("email-reply-parser"); | ||
## Contributing | ||
@@ -46,0 +53,0 @@ |
@@ -430,2 +430,25 @@ var fs = require("fs"); | ||
test.done(); | ||
} | ||
exports.test_email_fr_multiline = function(test) { | ||
let email = get_email("email_fr_multiline"); | ||
let fragments = email.getFragments(); | ||
test.equal(COMMON_FIRST_FRAGMENT, fragments[0].toString().trim()); | ||
test.equal(2, fragments.length); | ||
test.done(); | ||
} | ||
exports.test_email_en_multiline_2 = function(test) { | ||
let email = get_email("email_en_multiline_2"); | ||
let fragments = email.getFragments(); | ||
test.equal(COMMON_FIRST_FRAGMENT, fragments[0].toString().trim()); | ||
test.equal(2, fragments.length); | ||
test.done(); | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
89103
66
550
70
1
+ Addedre2@1.16.0
+ Added@gar/promisify@1.1.3(transitive)
+ Added@npmcli/fs@1.1.1(transitive)
+ Added@npmcli/move-file@1.1.2(transitive)
+ Added@tootallnate/once@1.1.2(transitive)
+ Addedabbrev@1.1.1(transitive)
+ Addedagent-base@6.0.2(transitive)
+ Addedagentkeepalive@4.6.0(transitive)
+ Addedaggregate-error@3.1.0(transitive)
+ Addedansi-regex@5.0.1(transitive)
+ Addedaproba@2.0.0(transitive)
+ Addedare-we-there-yet@3.0.1(transitive)
+ Addedbalanced-match@1.0.2(transitive)
+ Addedbrace-expansion@1.1.11(transitive)
+ Addedcacache@15.3.0(transitive)
+ Addedchownr@2.0.0(transitive)
+ Addedclean-stack@2.2.0(transitive)
+ Addedcolor-support@1.1.3(transitive)
+ Addedconcat-map@0.0.1(transitive)
+ Addedconsole-control-strings@1.1.0(transitive)
+ Addeddebug@4.4.0(transitive)
+ Addeddelegates@1.0.0(transitive)
+ Addedemoji-regex@8.0.0(transitive)
+ Addedencoding@0.1.13(transitive)
+ Addedenv-paths@2.2.1(transitive)
+ Addederr-code@2.0.3(transitive)
+ Addedfs-minipass@2.1.0(transitive)
+ Addedfs.realpath@1.0.0(transitive)
+ Addedgauge@4.0.4(transitive)
+ Addedglob@7.2.3(transitive)
+ Addedgraceful-fs@4.2.11(transitive)
+ Addedhas-unicode@2.0.1(transitive)
+ Addedhttp-cache-semantics@4.1.1(transitive)
+ Addedhttp-proxy-agent@4.0.1(transitive)
+ Addedhttps-proxy-agent@5.0.1(transitive)
+ Addedhumanize-ms@1.2.1(transitive)
+ Addediconv-lite@0.6.3(transitive)
+ Addedimurmurhash@0.1.4(transitive)
+ Addedindent-string@4.0.0(transitive)
+ Addedinfer-owner@1.0.4(transitive)
+ Addedinflight@1.0.6(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedinstall-artifact-from-github@1.3.5(transitive)
+ Addedip-address@9.0.5(transitive)
+ Addedis-fullwidth-code-point@3.0.0(transitive)
+ Addedis-lambda@1.0.1(transitive)
+ Addedisexe@2.0.0(transitive)
+ Addedjsbn@1.1.0(transitive)
+ Addedlru-cache@6.0.0(transitive)
+ Addedmake-fetch-happen@9.1.0(transitive)
+ Addedminimatch@3.1.2(transitive)
+ Addedminipass@3.3.65.0.0(transitive)
+ Addedminipass-collect@1.0.2(transitive)
+ Addedminipass-fetch@1.4.1(transitive)
+ Addedminipass-flush@1.0.5(transitive)
+ Addedminipass-pipeline@1.2.4(transitive)
+ Addedminipass-sized@1.0.3(transitive)
+ Addedminizlib@2.1.2(transitive)
+ Addedmkdirp@1.0.4(transitive)
+ Addedms@2.1.3(transitive)
+ Addednan@2.22.0(transitive)
+ Addednegotiator@0.6.4(transitive)
+ Addednode-gyp@8.4.1(transitive)
+ Addednopt@5.0.0(transitive)
+ Addednpmlog@6.0.2(transitive)
+ Addedonce@1.4.0(transitive)
+ Addedp-map@4.0.0(transitive)
+ Addedpath-is-absolute@1.0.1(transitive)
+ Addedpromise-inflight@1.0.1(transitive)
+ Addedpromise-retry@2.0.1(transitive)
+ Addedre2@1.16.0(transitive)
+ Addedreadable-stream@3.6.2(transitive)
+ Addedretry@0.12.0(transitive)
+ Addedrimraf@3.0.2(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedsafer-buffer@2.1.2(transitive)
+ Addedsemver@7.6.3(transitive)
+ Addedset-blocking@2.0.0(transitive)
+ Addedsignal-exit@3.0.7(transitive)
+ Addedsmart-buffer@4.2.0(transitive)
+ Addedsocks@2.8.3(transitive)
+ Addedsocks-proxy-agent@6.2.1(transitive)
+ Addedsprintf-js@1.1.3(transitive)
+ Addedssri@8.0.1(transitive)
+ Addedstring-width@4.2.3(transitive)
+ Addedstring_decoder@1.3.0(transitive)
+ Addedstrip-ansi@6.0.1(transitive)
+ Addedtar@6.2.1(transitive)
+ Addedunique-filename@1.1.1(transitive)
+ Addedunique-slug@2.0.2(transitive)
+ Addedutil-deprecate@1.0.2(transitive)
+ Addedwhich@2.0.2(transitive)
+ Addedwide-align@1.1.5(transitive)
+ Addedwrappy@1.0.2(transitive)
+ Addedyallist@4.0.0(transitive)