From ca30f8aba531bc34a98eca2f8a8c3d59af499a8b Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 21 Jan 2019 16:32:18 -0500 Subject: [PATCH 01/16] Resolve #112 by adding "quotedDisplayName" detector (#114) --- header.go | 19 ++++++++++++++++++- header_test.go | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/header.go b/header.go index 55039529..64fe012e 100644 --- a/header.go +++ b/header.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" stderrors "errors" + "fmt" "io" "mime" "net/textproto" @@ -170,7 +171,15 @@ func decodeToUTF8Base64Header(input string) string { return input } - tokens := strings.FieldsFunc(input, whiteSpaceRune) + // The standard lib performs an incremental inspection of this string, where the + // "skipSpace" method only strings.trimLeft for spaces and tabs. Here we have a + // hard dependency on space existing and not on next expected rune + // + // For resolving #112 with the least change, I will implement the + // "quoted display-name" detector, which will resolve the case specific + // issue stated in #112, but only in the case of a quoted display-name + // followed, without whitespace, by addr-spec. + tokens := strings.FieldsFunc(quotedDisplayName(input), whiteSpaceRune) output := make([]string, len(tokens)) for i, token := range tokens { if len(token) > 4 && strings.Contains(token, "=?") { @@ -196,6 +205,14 @@ func decodeToUTF8Base64Header(input string) string { return strings.Join(output, " ") } +func quotedDisplayName(s string) string { + if !strings.HasPrefix(s, "\"") { + return s + } + idx := strings.LastIndex(s, "\"") + return fmt.Sprintf("%s %s", s[:idx+1], s[idx+1:]) +} + // parseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function. func parseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) { mtype, params, err = mime.ParseMediaType(ctype) diff --git a/header_test.go b/header_test.go index 81465d3d..95b2ec05 100644 --- a/header_test.go +++ b/header_test.go @@ -148,6 +148,8 @@ func TestDecodeToUTF8Base64Header(t *testing.T) { {"=?UTF-8?Q?Miros=C5=82aw?= ", "=?UTF-8?b?TWlyb3PFgmF3?= "}, {"First Last (=?iso-8859-1?q?#=a3_c=a9_r=ae_u=b5?=)", "First Last (=?UTF-8?b?I8KjIGPCqSBywq4gdcK1?=)"}, + // Quoted display name without space before angle-addr spec, Issue #112 + {"\"=?UTF-8?b?TWlyb3PFgmF3?=\"", "=?UTF-8?b?Ik1pcm9zxYJhdyI=?= "}, } for _, tt := range testTable { From fa02efdefce2ac8832b7381aaf9779b6160615d3 Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 21 Jan 2019 16:36:09 -0500 Subject: [PATCH 02/16] Updated "detectAttachmentHeader" doc comment to, more accurately, depict the function flow (#116) --- detect.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/detect.go b/detect.go index 01e2ef3e..32cb140f 100644 --- a/detect.go +++ b/detect.go @@ -18,10 +18,10 @@ func detectMultipartMessage(root *Part) bool { return strings.HasPrefix(mediatype, ctMultipartPrefix) } -// detectAttachmentHeader returns true, if the given header defines an attachment. First it checks -// if the Content-Disposition header defines an attachement or inline attachment. If this test is -// false, the Content-Type header is checked for attachment, but not inline. Email clients use -// inline for their text bodies. +// detectAttachmentHeader returns true, if the given header defines an attachment. First it checks +// if the Content-Disposition header defines either an attachment part or an inline part with at +// least one parameter. If this test is false, the Content-Type header is checked for attachment, +// but not inline. Email clients use inline for their text bodies. // // Valid Attachment-Headers: // From 7e27711c51bbbef44e8946b46020b20dcd64e889 Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 21 Jan 2019 16:45:53 -0500 Subject: [PATCH 03/16] Make ParseMediaType public (#115) --- detect.go | 10 +++++----- envelope.go | 4 ++-- header.go | 9 +++++++-- part.go | 4 ++-- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/detect.go b/detect.go index 32cb140f..8e027afa 100644 --- a/detect.go +++ b/detect.go @@ -9,7 +9,7 @@ import ( func detectMultipartMessage(root *Part) bool { // Parse top-level multipart ctype := root.Header.Get(hnContentType) - mediatype, _, _, err := parseMediaType(ctype) + mediatype, _, _, err := ParseMediaType(ctype) if err != nil { return false } @@ -29,13 +29,13 @@ func detectMultipartMessage(root *Part) bool { // - Content-Disposition: inline; filename="frog.jpg" // - Content-Type: attachment; filename="frog.jpg" func detectAttachmentHeader(header textproto.MIMEHeader) bool { - mediatype, params, _, _ := parseMediaType(header.Get(hnContentDisposition)) + mediatype, params, _, _ := ParseMediaType(header.Get(hnContentDisposition)) if strings.ToLower(mediatype) == cdAttachment || (strings.ToLower(mediatype) == cdInline && len(params) > 0) { return true } - mediatype, _, _, _ = parseMediaType(header.Get(hnContentType)) + mediatype, _, _, _ = ParseMediaType(header.Get(hnContentType)) return strings.ToLower(mediatype) == cdAttachment } @@ -48,7 +48,7 @@ func detectTextHeader(header textproto.MIMEHeader, emptyContentTypeIsText bool) return true } - mediatype, _, _, err := parseMediaType(ctype) + mediatype, _, _, err := ParseMediaType(ctype) if err != nil { return false } @@ -72,7 +72,7 @@ func detectBinaryBody(root *Part) bool { // 'text/plain' or 'text/html'. // Example: // Content-Type: application/pdf; name="doc.pdf" - mediatype, _, _, _ := parseMediaType(root.Header.Get(hnContentType)) + mediatype, _, _, _ := ParseMediaType(root.Header.Get(hnContentType)) mediatype = strings.ToLower(mediatype) if mediatype != ctTextPlain && mediatype != ctTextHTML { return true diff --git a/envelope.go b/envelope.go index 1721c6b0..f73e8a78 100644 --- a/envelope.go +++ b/envelope.go @@ -236,7 +236,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error { var charset string var isHTML bool if ctype := root.Header.Get(hnContentType); ctype != "" { - if mediatype, mparams, _, err := parseMediaType(ctype); err == nil { + if mediatype, mparams, _, err := ParseMediaType(ctype); err == nil { isHTML = (mediatype == ctTextHTML) if mparams[hpCharset] != "" { charset = mparams[hpCharset] @@ -275,7 +275,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error { func parseMultiPartBody(root *Part, e *Envelope) error { // Parse top-level multipart ctype := root.Header.Get(hnContentType) - mediatype, params, _, err := parseMediaType(ctype) + mediatype, params, _, err := ParseMediaType(ctype) if err != nil { return fmt.Errorf("Unable to parse media type: %v", err) } diff --git a/header.go b/header.go index 64fe012e..3d9dfabf 100644 --- a/header.go +++ b/header.go @@ -213,8 +213,13 @@ func quotedDisplayName(s string) string { return fmt.Sprintf("%s %s", s[:idx+1], s[idx+1:]) } -// parseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function. -func parseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) { +// ParseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function. +// +// Tolerances accounted for: +// * Missing ';' between content-type and media parameters +// * Repeating media parameters +// * Unquoted values in media parameters containing 'tspecials' characters +func ParseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) { mtype, params, err = mime.ParseMediaType(ctype) if err != nil { // Small hack to remove harmless charset duplicate params. diff --git a/part.go b/part.go index f7b5d5b5..e5b7fd9c 100644 --- a/part.go +++ b/part.go @@ -113,7 +113,7 @@ func (p *Part) setupHeaders(r *bufio.Reader, defaultContentType string) error { ctype = defaultContentType } // Parse Content-Type header. - mtype, mparams, minvalidParams, err := parseMediaType(ctype) + mtype, mparams, minvalidParams, err := ParseMediaType(ctype) if err != nil { return err } @@ -140,7 +140,7 @@ func (p *Part) setupHeaders(r *bufio.Reader, defaultContentType string) error { // the disposition, filename, and charset fields. func (p *Part) setupContentHeaders(mediaParams map[string]string) { // Determine content disposition, filename, character set. - disposition, dparams, _, err := parseMediaType(p.Header.Get(hnContentDisposition)) + disposition, dparams, _, err := ParseMediaType(p.Header.Get(hnContentDisposition)) if err == nil { // Disposition is optional p.Disposition = disposition From a9ec2f8a90ed8d9c7d3dc56423d5dc0a6afbfecb Mon Sep 17 00:00:00 2001 From: Neil Date: Thu, 24 Jan 2019 15:34:04 -0500 Subject: [PATCH 04/16] Touchup of RCF2047 mime attribute parameter decoding (#118) --- header.go | 9 ++++++++- header_test.go | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/header.go b/header.go index 3d9dfabf..a0075e24 100644 --- a/header.go +++ b/header.go @@ -510,6 +510,13 @@ func rfc2047AttributeName(s string) string { return s } pair := strings.SplitAfter(s, "?=") - pair[0] = decodeHeader(pair[0]) + // lets assume that the attribute was encoded because of unicode characters being present + // then the attribute value should be quoted + keyValuePair := strings.SplitAfter(decodeHeader(pair[0]), "=") + // only quote the parameter value if it isn't already quoted + if !strings.HasPrefix(keyValuePair[1], "\"") { + keyValuePair[1] = fmt.Sprintf("\"%s", keyValuePair[1]) + } + pair[0] = strings.Join(keyValuePair, "") return strings.Join(pair, "") } diff --git a/header_test.go b/header_test.go index 95b2ec05..0e8d484f 100644 --- a/header_test.go +++ b/header_test.go @@ -194,6 +194,11 @@ func TestFixMangledMediaType(t *testing.T) { sep: ";", want: "one/two; name=\"file.two\"", }, + { + input: "application/octet-stream; =?UTF-8?B?bmFtZT3DsMKfwpTCii5tc2c=?=", + sep: " ", + want: "application/octet-stream;name=\"🔊.msg", + }, { input: "one/two name=\"file.two\" name=\"file.two\"", sep: " ", From 4ec8ed26de0595bbc7a1e02545b48b302a78fe37 Mon Sep 17 00:00:00 2001 From: Neil Date: Thu, 24 Jan 2019 17:05:50 -0500 Subject: [PATCH 05/16] Make decoding failure on base64.CorruptInputError non-fatal (#119) * Make decoding failure on base64.CorruptInputError non-fatal, closes #117 --- envelope_test.go | 19 ++++++ part.go | 22 ++++++- .../mime-bad-content-transfer-encoding.raw | 66 +++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 testdata/mail/mime-bad-content-transfer-encoding.raw diff --git a/envelope_test.go b/envelope_test.go index 4df32edf..b0aa841f 100644 --- a/envelope_test.go +++ b/envelope_test.go @@ -900,6 +900,25 @@ func TestBadContentTypeInMime(t *testing.T) { } } +func TestBadContentTransferEncodingInMime(t *testing.T) { + msg := test.OpenTestData("mail", "mime-bad-content-transfer-encoding.raw") + e, err := enmime.ReadEnvelope(msg) + + if err != nil { + t.Fatal("Failed to parse MIME:", err) + } + + var expectedErrorPresent bool + for _, v := range e.Errors { + if v.Name == enmime.ErrorMalformedBase64 && v.Severe { + expectedErrorPresent = true + } + } + if !expectedErrorPresent { + t.Fatal("Mail should have a severe malformed base64 error") + } +} + func TestBlankMediaName(t *testing.T) { msg := test.OpenTestData("mail", "mime-blank-media-name.raw") e, err := enmime.ReadEnvelope(msg) diff --git a/part.go b/part.go index e5b7fd9c..0e17d0eb 100644 --- a/part.go +++ b/part.go @@ -284,13 +284,13 @@ func (p *Part) decodeContent(r io.Reader) error { var err error contentReader, err = p.convertFromDetectedCharset(contentReader) if err != nil { - return err + return p.base64CorruptInputCheck(err) } } // Decode and store content. content, err := ioutil.ReadAll(contentReader) if err != nil { - return errors.WithStack(err) + return p.base64CorruptInputCheck(errors.WithStack(err)) } p.Content = content // Collect base64 errors. @@ -306,6 +306,24 @@ func (p *Part) decodeContent(r io.Reader) error { return nil } +// base64CorruptInputCheck will avoid fatal failure on corrupt base64 input +// +// This is a switch on errors.Cause(err).(type) for base64.CorruptInputError +func (p *Part) base64CorruptInputCheck(err error) error { + switch errors.Cause(err).(type) { + case base64.CorruptInputError: + p.Content = nil + p.Errors = append(p.Errors, &Error{ + Name: ErrorMalformedBase64, + Detail: err.Error(), + Severe: true, + }) + return nil + default: + return err + } +} + // Clone returns a clone of the current Part. func (p *Part) Clone(parent *Part) *Part { if p == nil { diff --git a/testdata/mail/mime-bad-content-transfer-encoding.raw b/testdata/mail/mime-bad-content-transfer-encoding.raw new file mode 100644 index 00000000..8754ab45 --- /dev/null +++ b/testdata/mail/mime-bad-content-transfer-encoding.raw @@ -0,0 +1,66 @@ +Received: from HVD-Exch16-01. internal.contoso.dom (192.168.1.50) by + HVD-Exch16-01. internal.contoso.dom (192.168.1.50) with Microsoft SMTP + Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id + 15.1.1591.10 via Mailbox Transport; Sun, 20 Jan 2019 14:33:43 -0600 +Message-ID: +Content-Type: multipart/alternative; boundary="ABOUNDARY.075224149159994985" +Subject: alert +Date: Sun, 20 Jan 2019 23:33:39 +0300 +From: example +To: example +Return-Path: doonglol@contoso.com +MIME-Version: 1.0 + +--ABOUNDARY.075224149159994985 +Content-Type: text/plain; format=flowed; charset="UTF-8" +Content-Transfer-Encoding: base64 + +ploozdect nispplul swunkglap zarplontruskdremp fruftsist +ploystoov smohsless cregzass smoosknuspgeftlut priptyasp +smoorbrooft pukstrooz pruhrept frorblilcroospbrod spakkess +ruzyoost toctswing grumsmach birflugjechfapt broostspust +nudyet changlig droontmunt chapplooypectsoy gafral +slipblept geskchant gluddrach prontcrundhaysnind spomdast +sahshooy yinzess snutgruch breyloossstrolflooy vuptsling +ploorlish hondcrud mebnush flendblutslawspint groowpromp +maspflid grashsmey ploozblust blanzafbumyuct gligplask +fasstross voongluf gluzzast luzflooctlerpach brishhonk +troowtrict flezskooft honkglok bloofjooshshuhtuz pogpreh +clinttrost strachlunk strehslonk flinflumcrashshipt prehhach +stooplet tucttoog prindskech bloopdrarhoomgont grabtesp +prilput froontnooch nubstrisk lectkampswulgrupt pangyef +spawsnef snoondshin noonskunk sperglikvimpcrag flizfan +frogplusk claspcrav flacttink blandpahcroskbropt plobnof +sniwmact flobskenk grotslish crosppampsooskslut dekprich +slitpruw smupwiw skikstint hostslaglinboh glovyev +groolshict mirbrod tooddih smendgloshtooctsmak nanclih +plantfloog plosszav chosksmek shechsnupfrashdik smooyhipt +skulclod glubsmak swakproog snostslahshoplan sledtroosk +wizpek voofwoss ploospmoow daskspongbrawbroop suhspat +jatnoft wofthah froolyup slootgankcreshglol pidshek +nimpchest smichmund clofflask lundzoonkroonsmiss pleskting +wechslapt yulfut charshisk jospharpoffroor flubfraw +smirslang konsmiy zivblad sterchoowpooyjik huctkul +blonghum vaptslooft gloostplemp puttagslesssuft shivvuh +strongslog craftjoob lidfreh rizskachbumloow kifkid +druksnoopt drumpclir govbluf siptgeptgrozstrooss glarhaft +flanyef zetkel kifspunk gefhewsnohpur spewskoosh +rafgob zoochbiw foptgav fuyyonkbrictspob plenrimp +fruchbremp brooshchep smafbluh shoochcrakkangstrub moontdoomp +glooctclog jiffrint droozkuz zopprumpspaftheb joofstub +blidguss deshdrect gromstasp clebtusppruskgov dudtrosk +prudskil wengdesp stransloh droobglizhondsmop dewspook +vedsosp trempwusp siyfrool kuchstrissnookflest croptgum +leshkess ploobfrey blintsung rengjictbrushgrooss jiptwiv +kugstug bruchkop giftchir piwclisksloozhist frispgech +glactspang cruvvob prushswach plegpitwoospvow chuzgrunt + +--ABOUNDARY.075224149159994985 +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: base64 + +PG1ldGEgaHR0cC1lcXVpdj0iQ29udGVudC1UeXBlIiBjb250ZW50PSJ0ZXh0L2h0bWw7IGNoYXJz +ZXQ9dXRmLTgiPgoKPGJyPgpUaGlzIGlzIGFuIGV4YW1wbGUKPGJyPg== + + +--ABOUNDARY.075224149159994985-- From 32384ef9fb4bf35ba74c859d7aeea172f62db744 Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 25 Jan 2019 11:27:37 -0500 Subject: [PATCH 06/16] guard for index out of bounds in rfc2047AttributeName (#120) --- header.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/header.go b/header.go index a0075e24..f8c5ccbd 100644 --- a/header.go +++ b/header.go @@ -514,8 +514,10 @@ func rfc2047AttributeName(s string) string { // then the attribute value should be quoted keyValuePair := strings.SplitAfter(decodeHeader(pair[0]), "=") // only quote the parameter value if it isn't already quoted - if !strings.HasPrefix(keyValuePair[1], "\"") { - keyValuePair[1] = fmt.Sprintf("\"%s", keyValuePair[1]) + if len(keyValuePair) > 1 { + if !strings.HasPrefix(keyValuePair[1], "\"") { + keyValuePair[1] = fmt.Sprintf("\"%s", keyValuePair[1]) + } } pair[0] = strings.Join(keyValuePair, "") return strings.Join(pair, "") From afba0ddb66823e36e06b0c28b84e789b12137219 Mon Sep 17 00:00:00 2001 From: Neil Date: Sat, 26 Jan 2019 13:18:06 -0500 Subject: [PATCH 07/16] Increase of code test coverage (#122) --- cmd/example_test.go | 35 ++++++- internal/coding/charsets_test.go | 27 ++++++ internal/test/testing_test.go | 153 +++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+), 1 deletion(-) diff --git a/cmd/example_test.go b/cmd/example_test.go index 91a33cbb..acd76783 100644 --- a/cmd/example_test.go +++ b/cmd/example_test.go @@ -24,6 +24,34 @@ Text section. Content-Type: text/html HTML section. +--Enmime-Test-100 +Content-Transfer-Encoding: base64 +Content-Disposition: inline; + filename=favicon.png +Content-Type: image/png; + x-unix-mode=0644; + name="favicon.png" +Content-Id: <8B8481A2-25CA-4886-9B5A-8EB9115DD064@skynet> + +iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ +bWFnZVJlYWR5ccllPAAAAlFJREFUeNqUU8tOFEEUPVVdNV3dPe8xYRBnjGhmBgKjKzCIiQvBoIaN +bly5Z+PSv3Aj7DSiP2B0rwkLGVdGgxITSCRIJGSMEQWZR3eVt5sEFBgTb/dN1yvnnHtPNTPG4Pqd +HgCMXnPRSZrpSuH8vUJu4DE4rYHDGAZDX62BZttHqTiIayM3gGiXQsgYLEvATaqxU+dy1U13YXap +XptpNHY8iwn8KyIAzm1KBdtRZWErpI5lEWTXp5Z/vHpZ3/wyKKwYGGOdAYwR0EZwoezTYApBEIOb +yELl/aE1/83cp40Pt5mxqCKrE4Ck+mVWKKcI5tA8BLEhRBKJLjez6a7MLq7XZtp+yyOawwCBtkiB +VZDKzRk4NN7NQBMYPHiZDFhXY+p9ff7F961vVcnl4R5I2ykJ5XFN7Ab7Gc61VoipNBKF+PDyztu5 +lfrSLT/wIwCxq0CAGtXHZTzqR2jtwQiXONma6hHpj9sLT7YaPxfTXuZdBGA02Wi7FS48YiTfj+i2 +NhqtdhP5RC8mh2/Op7y0v6eAcWVLFT8D7kWX5S9mepp+C450MV6aWL1cGnvkxbwHtLW2B9AOkLeU +d9KEDuh9fl/7CEj7YH5g+3r/lWfF9In7tPz6T4IIwBJOr1SJyIGQMZQbsh5P9uBq5VJtqHh2mo49 +pdw5WFoEwKWqWHacaWOjQXWGcifKo6vj5RGS6zykI587XeUIQDqJSmAp+lE4qt19W5P9o8+Lma5D +cjsC8JiT607lMVkdqQ0Vyh3lHhmh52tfNy78ajXv0rgYzv8nfwswANuk+7sD/Q0aAAAAAElFTkSu +QmCC +--Enmime-Test-100 +Content-Transfer-Encoding: base64 +Content-Type: text/html; name="test.html" +Content-Disposition: attachment; filename=test.html + +PGh0bWw+Cg== --Enmime-Test-100-- ` // Convert MIME text to Envelope @@ -66,14 +94,19 @@ Content-Type: text/html // HTML section. // // ## Attachment List + // - test.html (text/html) // // ## Inline List + // - favicon.png (image/png) + // Content-ID: 8B8481A2-25CA-4886-9B5A-8EB9115DD064@skynet // // ## Other Part List // // ## MIME Part Tree // multipart/mixed // |-- text/plain - // `-- text/html + // |-- text/html + // |-- image/png, disposition: inline, filename: "favicon.png" + // `-- text/html, disposition: attachment, filename: "test.html" // } diff --git a/internal/coding/charsets_test.go b/internal/coding/charsets_test.go index f06055a7..57a4ad23 100644 --- a/internal/coding/charsets_test.go +++ b/internal/coding/charsets_test.go @@ -69,3 +69,30 @@ func TestFindCharsetInHTML(t *testing.T) { } } } + +func TestConvertToUTF8String(t *testing.T) { + var testTable = []struct { + charset string + input []byte + want string + }{ + {"utf-8", []byte("abcABC\u2014"), "abcABC\u2014"}, + {"windows-1250", []byte{'a', 'Z', 0x96}, "aZ\u2013"}, + {"big5", []byte{0xa1, 0x5d, 0xa1, 0x61, 0xa1, 0x71}, "\uff08\uff5b\u3008"}, + } + // Success Conditions + for _, v := range testTable { + s, err := coding.ConvertToUTF8String(v.charset, v.input) + if err != nil { + t.Error("UTF-8 conversion failed") + } + if s != v.want { + t.Errorf("Got %s, but wanted %s", s, v.want) + } + } + // Fail for unsupported charset + _, err := coding.ConvertToUTF8String("123", []byte("there is no 123 charset")) + if err == nil { + t.Error("Charset 123 should not exist") + } +} diff --git a/internal/test/testing_test.go b/internal/test/testing_test.go index a0599fba..c921af00 100644 --- a/internal/test/testing_test.go +++ b/internal/test/testing_test.go @@ -1,6 +1,8 @@ package test import ( + "os" + "path/filepath" "testing" "github.com/jhillyerd/enmime" @@ -102,3 +104,154 @@ func TestHelperComparePartsInequal(t *testing.T) { }) } } + +// TestOpenTestDataPanic verifies that this function will panic as predicted +func TestOpenTestDataPanic(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("OpenTestData did not panic") + } + }() + _ = OpenTestData("invalidDir", "invalidFile") +} + +// TestOpenTestData ensures that the returned io.Reader has the correct underlying type, that +// the file descriptor referenced is a directory and that we have permission to read it +func TestOpenTestData(t *testing.T) { + // this will open a handle to the "testdata" directory + r := OpenTestData("", "") + if r == nil { + t.Error("The returned io.Reader should not be nil") + } + + osFilePtr, ok := r.(*os.File) + if !ok { + t.Errorf("Underlying type should be *os.File, but got %T instead", r) + } + + info, err := osFilePtr.Stat() + if err != nil { + t.Error("We should have read permission for \"testdata\" directory") + } + + if !info.IsDir() { + t.Error("File descriptor labeled \"testdata\" should be a directory") + } +} + +// TestContentContainsString checks if the string contains a provided sub-string +func TestContentContainsString(t *testing.T) { + // Success + ContentContainsString(t, []byte("someString"), "some") + // Failure + ContentContainsString(&testing.T{}, []byte("someString"), "nope") +} + +// TestContentEqualsString checks if the strings are equal +func TestContentEqualsString(t *testing.T) { + // Success + ContentEqualsString(t, []byte("someString"), "someString") + // Failure + ContentEqualsString(&testing.T{}, []byte("someString"), "nope") +} + +// TestContentEqualsBytes checks if the slices of bytes are equal +func TestContentEqualsBytes(t *testing.T) { + // Success + ContentEqualsBytes(t, []byte("someString"), []byte("someString")) + // Failure + ContentEqualsBytes(&testing.T{}, []byte("someString"), []byte("nope")) +} + +// TestCompareEnvelope checks all publicly accessible members of an envelope for differences +func TestCompareEnvelope(t *testing.T) { + fileA, err := os.Open(filepath.Join("..", "..", "testdata", "mail", "attachment.raw")) + if err != nil { + t.Error(err) + } + envelopeA, err := enmime.ReadEnvelope(fileA) + if err != nil { + t.Error(err) + } + + // Success + success := CompareEnvelope(t, envelopeA, envelopeA) + if !success { + t.Error("Same file should have identical envelopes") + } + + // Success on "want" and "got" nil + success = CompareEnvelope(t, nil, nil) + if !success { + t.Error("Comparing nil to nil should result in true") + } + + // Fail on "got" nil + success = CompareEnvelope(&testing.T{}, nil, envelopeA) + if success { + t.Error("Got is nil, envelopeA should not be the same") + } + + // Fail on "want" nil + success = CompareEnvelope(&testing.T{}, envelopeA, nil) + if success { + t.Error("Want is nil, envelopeA should not be the same") + } + + // Fail on root Part mismatch nil + envelopeB := *envelopeA + envelopeB.Root = nil + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope Root parts should not be the same") + } + envelopeB.Root = envelopeA.Root + + // Fail on Text mismatch + envelopeB.Text = "mismatch" + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope Text parts should not be the same") + } + envelopeB.Text = envelopeA.Text + + // Fail on HTML mismatch + envelopeB.HTML = "mismatch" + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope HTML parts should not be the same") + } + envelopeB.HTML = envelopeA.HTML + + // Fail on Attachment count mismatch + envelopeB.Attachments = append(envelopeB.Attachments, &enmime.Part{}) + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope Attachment slices should not be the same") + } + envelopeB.Attachments = envelopeA.Attachments + + // Fail on Inlines count mismatch + envelopeB.Inlines = append(envelopeB.Inlines, &enmime.Part{}) + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope Inlines slices should not be the same") + } + envelopeB.Inlines = envelopeA.Inlines + + // Fail on OtherParts count mismatch + envelopeB.OtherParts = append(envelopeB.OtherParts, &enmime.Part{}) + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope OtherParts slices should not be the same") + } + envelopeB.OtherParts = envelopeA.OtherParts + + // Fail on Errors count mismatch + envelopeB.Errors = append(envelopeB.Errors, &enmime.Error{}) + success = CompareEnvelope(&testing.T{}, envelopeA, &envelopeB) + if success { + t.Error("Envelope Errors slices should not be the same") + } + envelopeB.Errors = envelopeA.Errors +} From 8df923ed6f26f2982fcb62e4f4a8432369ce55fd Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 28 Jan 2019 11:07:28 -0500 Subject: [PATCH 08/16] Implementation which does not depend on successive function calls (#121) * Refactor rfc2047AttributeName to handle n-number of occurrences and appropriately manage quoting the final parameter value * Check for missing token after slash, apply defaults if true --- header.go | 77 ++++++++++++++++++++++++++++++++++++++++---------- header_test.go | 9 ++++-- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/header.go b/header.go index f8c5ccbd..a4a946c8 100644 --- a/header.go +++ b/header.go @@ -20,11 +20,13 @@ const ( cdInline = "inline" // Standard MIME content types + ctAppPrefix = "application/" ctAppOctetStream = "application/octet-stream" ctMultipartAltern = "multipart/alternative" ctMultipartMixed = "multipart/mixed" ctMultipartPrefix = "multipart/" ctMultipartRelated = "multipart/related" + ctTextPrefix = "text/" ctTextPlain = "text/plain" ctTextHTML = "text/html" @@ -269,6 +271,20 @@ func fixMangledMediaType(mtype, sep string) string { // The content type is completely missing. Put in a placeholder. p = ctPlaceholder } + // Check for missing token after slash + if strings.HasSuffix(p, "/") { + switch p { + case ctTextPrefix: + p = ctTextPlain + case ctAppPrefix: + p = ctAppOctetStream + case ctMultipartPrefix: + p = ctMultipartMixed + default: + // Safe default + p = ctAppOctetStream + } + } default: if !strings.Contains(p, "=") { p = p + "=" + pvPlaceholder @@ -277,8 +293,8 @@ func fixMangledMediaType(mtype, sep string) string { // RFC-2047 encoded attribute name p = rfc2047AttributeName(p) - pair := strings.Split(p, "=") - if strings.Contains(mtype, pair[0]+"=") { + pair := strings.SplitAfter(p, "=") + if strings.Contains(mtype, pair[0]) { // Ignore repeated parameters. continue } @@ -505,20 +521,51 @@ func whiteSpaceRune(r rune) bool { // rfc2047AttributeName checks if the attribute name is encoded in RFC2047 format // RFC2047 Example: // `=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=` -func rfc2047AttributeName(s string) string { - if !strings.Contains(s, "?=") { - return s +func rfc2047AttributeName(name string) string { + if !strings.Contains(name, "?=") { + return name } - pair := strings.SplitAfter(s, "?=") - // lets assume that the attribute was encoded because of unicode characters being present - // then the attribute value should be quoted - keyValuePair := strings.SplitAfter(decodeHeader(pair[0]), "=") - // only quote the parameter value if it isn't already quoted - if len(keyValuePair) > 1 { - if !strings.HasPrefix(keyValuePair[1], "\"") { - keyValuePair[1] = fmt.Sprintf("\"%s", keyValuePair[1]) + // copy the string so we can return the original if we encounter any issues + s := name + + // handle n-number of RFC2047 chunk occurrences + count := strings.Count(name, "?=") + result := &strings.Builder{} + var beginning, ending int + for i := 0; i < count; i++ { + beginning = strings.Index(s, "=?") + ending = strings.Index(s, "?=") + + if beginning == -1 || ending == -1 { + // the RFC2047 chunk is either malformed or is not an RFC2047 chunk + return name + } + + _, err := result.WriteString(s[:beginning]) + if err != nil { + return name + } + _, err = result.WriteString(decodeHeader(s[beginning : ending+2])) + if err != nil { + return name } + + s = s[ending+2:] + } + _, err := result.WriteString(s) + if err != nil { + return name + } + keyValuePair := strings.SplitAfter(result.String(), "=") + if len(keyValuePair) < 2 { + return result.String() + } + // Add quotes as needed + if !strings.HasPrefix(keyValuePair[1], "\"") { + keyValuePair[1] = fmt.Sprintf("\"%s", keyValuePair[1]) + } + if !strings.HasSuffix(keyValuePair[1], "\"") { + keyValuePair[1] = fmt.Sprintf("%s\"", keyValuePair[1]) } - pair[0] = strings.Join(keyValuePair, "") - return strings.Join(pair, "") + return strings.Join(keyValuePair, "") } diff --git a/header_test.go b/header_test.go index 0e8d484f..d6abcd28 100644 --- a/header_test.go +++ b/header_test.go @@ -182,7 +182,12 @@ func TestFixMangledMediaType(t *testing.T) { { input: "application/octet-stream;=?UTF-8?B?bmFtZT0iw7DCn8KUwoo=?=You've got a new voice miss call.msg", sep: ";", - want: "application/octet-stream;name=\"ð\u009f\u0094\u008aYou've got a new voice miss call.msg", + want: "application/octet-stream;name=\"ð\u009f\u0094\u008aYou've got a new voice miss call.msg\"", + }, + { + input: "application/; name=\"Voice message from =?UTF-8?B?4piOICsxIDI1MS0yNDUtODA0NC5tc2c=?=\";", + sep: ";", + want: "application/octet-stream; name=\"Voice message from ☎ +1 251-245-8044.msg\"", }, { input: "application/pdf name=\"file.pdf\"", @@ -197,7 +202,7 @@ func TestFixMangledMediaType(t *testing.T) { { input: "application/octet-stream; =?UTF-8?B?bmFtZT3DsMKfwpTCii5tc2c=?=", sep: " ", - want: "application/octet-stream;name=\"🔊.msg", + want: "application/octet-stream;name=\"🔊.msg\"", }, { input: "one/two name=\"file.two\" name=\"file.two\"", From b73de15654e44a27620a079d44a2dda89a1480f9 Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 11 Feb 2019 22:46:46 -0500 Subject: [PATCH 09/16] Requaos/nontermboundry (#123) * Refactor boundaryReader and parseParts to be more resilient * When a part is not terminated with a boundary, the boundaryReader will read until the end into the part content * The .Read() method of the boundaryReader only sets an internal flag when a part is unbounded so that \"setupHeaders()\" will perform as normal. This flag is read in \"parseParts()\" to set a warning on the Errors slice * declare error using standard errors package for proper stacktrace reporting * use recently released errors functionality "errors.WithMessagef()" * Add test case for non-crlf whitespace after boundary --- boundary.go | 212 +++++++++++------- boundary_test.go | 9 +- go.mod | 2 +- part.go | 11 +- part_test.go | 35 +++ .../parts/multimixed-no-closing-boundary.raw | 12 + 6 files changed, 187 insertions(+), 94 deletions(-) create mode 100644 testdata/parts/multimixed-no-closing-boundary.raw diff --git a/boundary.go b/boundary.go index 884f5ef3..3df064e7 100644 --- a/boundary.go +++ b/boundary.go @@ -3,8 +3,10 @@ package enmime import ( "bufio" "bytes" + stderrors "errors" "io" "io/ioutil" + "unicode" "github.com/pkg/errors" ) @@ -14,14 +16,18 @@ import ( // from it. const peekBufferSize = 4096 +var errNoBoundaryTerminator = stderrors.New("expected boundary not present") + type boundaryReader struct { - finished bool // No parts remain when finished - partsRead int // Number of parts read thus far - r *bufio.Reader // Source reader - nlPrefix []byte // NL + MIME boundary prefix - prefix []byte // MIME boundary prefix - final []byte // Final boundary prefix - buffer *bytes.Buffer // Content waiting to be read + finished bool // No parts remain when finished + partsRead int // Number of parts read thus far + r *bufio.Reader // Source reader + nlPrefix []byte // NL + MIME boundary prefix + prefix []byte // MIME boundary prefix + final []byte // Final boundary prefix + buffer *bytes.Buffer // Content waiting to be read + crBoundaryPrefix bool // Flag for CR in CRLF + MIME boundary + unbounded bool // Flag to throw errNoBoundaryTerminator } // newBoundaryReader returns an initialized boundaryReader @@ -37,48 +43,126 @@ func newBoundaryReader(reader *bufio.Reader, boundary string) *boundaryReader { } // Read returns a buffer containing the content up until boundary +// +// Excerpt from io package on io.Reader implementations: +// +// type Reader interface { +// Read(p []byte) (n int, err error) +// } +// +// Read reads up to len(p) bytes into p. It returns the number of +// bytes read (0 <= n <= len(p)) and any error encountered. Even +// if Read returns n < len(p), it may use all of p as scratch space +// during the call. If some data is available but not len(p) bytes, +// Read conventionally returns what is available instead of waiting +// for more. +// +// When Read encounters an error or end-of-file condition after +// successfully reading n > 0 bytes, it returns the number of bytes +// read. It may return the (non-nil) error from the same call or +// return the error (and n == 0) from a subsequent call. An instance +// of this general case is that a Reader returning a non-zero number +// of bytes at the end of the input stream may return either err == EOF +// or err == nil. The next Read should return 0, EOF. +// +// Callers should always process the n > 0 bytes returned before +// considering the error err. Doing so correctly handles I/O errors +// that happen after reading some bytes and also both of the allowed +// EOF behaviors. func (b *boundaryReader) Read(dest []byte) (n int, err error) { if b.buffer.Len() >= len(dest) { - // This read request can be satisfied entirely by the buffer + // This read request can be satisfied entirely by the buffer. return b.buffer.Read(dest) } - peek, err := b.r.Peek(peekBufferSize) - peekEOF := (err == io.EOF) - if err != nil && !peekEOF && err != bufio.ErrBufferFull { - // Unexpected error - return 0, errors.WithStack(err) - } - var nCopy int - idx, complete := locateBoundary(peek, b.nlPrefix) - if idx != -1 { - // Peeked boundary prefix, read until that point - nCopy = idx - if !complete && nCopy == 0 { - // Incomplete boundary, move past it - nCopy = 1 + for i := 0; i < cap(dest); i++ { + cs, err := b.r.Peek(1) + if err != nil && err != io.EOF { + return 0, errors.WithStack(err) } - } else { - // No boundary found, move forward a safe distance - if nCopy = len(peek) - len(b.nlPrefix) - 1; nCopy <= 0 { - nCopy = 0 - if peekEOF { - // No more peek space remaining and no boundary found - return 0, errors.WithStack(io.ErrUnexpectedEOF) + // Ensure that we can switch on the first byte of 'cs' without panic. + if len(cs) > 0 { + switch cs[0] { + // Check for line feed as potential LF boundary prefix. + case '\n': + peek, err := b.r.Peek(len(b.nlPrefix) + 2) + switch err { + case nil: + // Check the whitespace at the head of the peek to avoid checking for a boundary early. + if bytes.HasPrefix(peek, []byte("\n\n")) || + bytes.HasPrefix(peek, []byte("\n\r")) { + break + } + // Check the peek buffer for a boundary delimiter or terminator. + if b.isDelimiter(peek[1:]) || b.isTerminator(peek[1:]) { + // Check if we stored a carriage return. + if b.crBoundaryPrefix { + b.crBoundaryPrefix = false + // Let us now unread that back onto the io.Reader, since + // we have found what we are looking for and this byte + // belongs to the bounded block we are reading. + err = b.r.UnreadByte() + switch err { + case nil: + // Carry on. + case bufio.ErrInvalidUnreadByte: + // Carriage return boundary prefix bit already unread. + default: + return 0, errors.WithStack(err) + } + } + // We have found our boundary terminator, lets write out the final bytes + // and return io.EOF to indicate that this section read is complete. + n, err = b.buffer.Read(dest) + switch err { + case nil, io.EOF: + return n, io.EOF + default: + return 0, errors.WithStack(err) + } + } + case io.EOF: + // We have reached the end without finding a boundary, + // so we flag the boundary reader to add an error to + // the errors slice and write what we have to the buffer. + b.unbounded = true + default: + continue + } + // Checked '\n' was not prefix to a boundary. + if b.crBoundaryPrefix { + b.crBoundaryPrefix = false + // Stored '\r' should be written to the buffer now. + err = b.buffer.WriteByte('\r') + if err != nil { + return 0, errors.WithStack(err) + } + } + // Check for carriage return as potential CRLF boundary prefix. + case '\r': + _, err := b.r.ReadByte() + if err != nil { + return 0, errors.WithStack(err) + } + // Flag the boundary reader to indicate that we + // have stored a '\r' as a potential CRLF prefix. + b.crBoundaryPrefix = true + continue } } - } - if nCopy > 0 { - if _, err = io.CopyN(b.buffer, b.r, int64(nCopy)); err != nil { - return 0, errors.WithStack(err) + + _, err = io.CopyN(b.buffer, b.r, 1) + if err != nil { + // EOF is not fatal, it just means that we have drained the reader. + if errors.Cause(err) == io.EOF { + break + } + return 0, err } } + // Read the contents of the buffer into the destination slice. n, err = b.buffer.Read(dest) - if err == io.EOF && !complete { - // Only the buffer is empty, not the boundaryReader - return n, nil - } return n, err } @@ -88,7 +172,7 @@ func (b *boundaryReader) Next() (bool, error) { return false, nil } if b.partsRead > 0 { - // Exhaust the current part to prevent errors when moving to the next part + // Exhaust the current part to prevent errors when moving to the next part. _, _ = io.Copy(ioutil.Discard, b) } for { @@ -105,21 +189,21 @@ func (b *boundaryReader) Next() (bool, error) { return false, nil } if err != io.EOF && b.isDelimiter(line) { - // Start of a new part + // Start of a new part. b.partsRead++ return true, nil } if err == io.EOF { - // Intentionally not wrapping with stack + // Intentionally not wrapping with stack. return false, io.EOF } if b.partsRead == 0 { // The first part didn't find the starting delimiter, burn off any preamble in front of - // the boundary + // the boundary. continue } b.finished = true - return false, errors.Errorf("expecting boundary %q, got %q", string(b.prefix), string(line)) + return false, errors.WithMessagef(errNoBoundaryTerminator, "expecting boundary %q, got %q", string(b.prefix), string(line)) } } @@ -130,11 +214,10 @@ func (b *boundaryReader) isDelimiter(buf []byte) bool { return false } - // Fast forward to the end of the boundary prefix + // Fast forward to the end of the boundary prefix. buf = buf[idx+len(b.prefix):] - buf = bytes.TrimLeft(buf, " \t") if len(buf) > 0 { - if buf[0] == '\r' || buf[0] == '\n' { + if unicode.IsSpace(rune(buf[0])) { return true } } @@ -147,42 +230,3 @@ func (b *boundaryReader) isTerminator(buf []byte) bool { idx := bytes.Index(buf, b.final) return idx != -1 } - -// Locate boundaryPrefix in buf, returning its starting idx. If complete is true, the boundary -// is terminated properly in buf, otherwise it could be false due to running out of buffer, or -// because it is not the actual boundary. -// -// Complete boundaries end in "--" or a newline -func locateBoundary(buf, boundaryPrefix []byte) (idx int, complete bool) { - bpLen := len(boundaryPrefix) - idx = bytes.Index(buf, boundaryPrefix) - if idx == -1 { - return - } - - // Handle CR if present - if idx > 0 && buf[idx-1] == '\r' { - idx-- - bpLen++ - } - - // Fast forward to the end of the boundary prefix - buf = buf[idx+bpLen:] - if len(buf) == 0 { - // Need more bytes to verify completeness - return - } - if len(buf) > 1 { - if buf[0] == '-' && buf[1] == '-' { - return idx, true - } - } - buf = bytes.TrimLeft(buf, " \t") - if len(buf) > 0 { - if buf[0] == '\r' || buf[0] == '\n' { - return idx, true - } - } - - return -} diff --git a/boundary_test.go b/boundary_test.go index 38a6adbe..dafa9ebe 100644 --- a/boundary_test.go +++ b/boundary_test.go @@ -18,6 +18,11 @@ func TestBoundaryReader(t *testing.T) { boundary: "STOPHERE", want: "good", }, + { + input: "good\r\n--STOPHERE\t\r\nafter", + boundary: "STOPHERE", + want: "good", + }, { input: "good\r\n--STOPHERE--\r\nafter", boundary: "STOPHERE", @@ -290,8 +295,8 @@ func TestBoundaryReaderNoTerminator(t *testing.T) { t.Fatal("Next() = false, want: true") } - // Second part should error - want := "expecting boundary" + // There is no second part should, error should be EOF. + want := "EOF" next, err = br.Next() if err == nil { t.Fatal("Error was nil, wanted:", want) diff --git a/go.mod b/go.mod index 94364c05..3d9a8fec 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0 github.com/mattn/go-runewidth v0.0.3 // indirect github.com/olekukonko/tablewriter v0.0.0-20180912035003-be2c049b30cc // indirect - github.com/pkg/errors v0.8.0 + github.com/pkg/errors v0.8.1 github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect golang.org/x/net v0.0.0-20181017193950-04a2e542c03f // indirect diff --git a/part.go b/part.go index 0e17d0eb..070e3789 100644 --- a/part.go +++ b/part.go @@ -384,6 +384,10 @@ func parseParts(parent *Part, reader *bufio.Reader) error { if err != nil && errors.Cause(err) != io.EOF { return err } + if br.unbounded { + parent.addWarning(ErrorMissingBoundary, "Boundary %q was not closed correctly", + parent.Boundary) + } if !next { break } @@ -401,13 +405,6 @@ func parseParts(parent *Part, reader *bufio.Reader) error { // Empty header probably means the part didn't use the correct trailing "--" syntax to // close its boundary. if _, err = br.Next(); err != nil { - if errors.Cause(err) == io.EOF || strings.HasSuffix(err.Error(), "EOF") { - // There are no more Parts. The error must belong to the parent, because this - // part doesn't exist. - parent.addWarning(ErrorMissingBoundary, "Boundary %q was not closed correctly", - parent.Boundary) - break - } // The error is already wrapped with a stack, so only adding a message here. // TODO: Once `errors` releases a version > v0.8.0, change to use errors.WithMessagef() return errors.WithMessage(err, fmt.Sprintf("error at boundary %v", parent.Boundary)) diff --git a/part_test.go b/part_test.go index ba187754..9e51a751 100644 --- a/part_test.go +++ b/part_test.go @@ -715,6 +715,7 @@ func TestBadBoundaryTerm(t *testing.T) { p = p.NextSibling wantp = &enmime.Part{ Parent: test.PartExists, + NextSibling: test.PartExists, ContentType: "text/html", Charset: "us-ascii", PartID: "2", @@ -818,6 +819,40 @@ func TestContentTypeParamUnquotedSpecial(t *testing.T) { test.ComparePart(t, p, wantp) } +func TestNoClosingBoundary(t *testing.T) { + r := test.OpenTestData("parts", "multimixed-no-closing-boundary.raw") + p, err := enmime.ReadParts(r) + if err != nil { + t.Errorf("%+v", err) + } + if p == nil { + t.Fatal("Expected part but got nil") + } + + wantp := &enmime.Part{ + Parent: test.PartExists, + PartID: "1", + ContentType: "text/html", + Charset: "UTF-8", + } + test.ComparePart(t, p.FirstChild, wantp) + t.Log(string(p.FirstChild.Content)) + + expected := "Missing Boundary" + hasCorrectError := false + for _, v := range p.Errors { + if v.Severe { + t.Errorf("Expected Severe to be false, got true for %q", v.Name) + } + if v.Name == expected { + hasCorrectError = true + } + } + if !hasCorrectError { + t.Fatalf("Did not find expected error on part. Expected %q but got %v", expected, p.Errors) + } +} + func TestContentTypeParamMissingClosingQuote(t *testing.T) { r := test.OpenTestData("parts", "missing-closing-param-quote.raw") p, err := enmime.ReadParts(r) diff --git a/testdata/parts/multimixed-no-closing-boundary.raw b/testdata/parts/multimixed-no-closing-boundary.raw new file mode 100644 index 00000000..27055dac --- /dev/null +++ b/testdata/parts/multimixed-no-closing-boundary.raw @@ -0,0 +1,12 @@ +Content-Type: multipart/mixed; boundary="Enmime-Test-100" + +--Enmime-Test-100 +Content-Type: text/html; charset=UTF-8 +Content-Transfer-Encoding: quoted-printable + + + +

+=0DHello, +

+
From 4238e6fc1d49a7dcfb107c524ebae8d15b9cddb8 Mon Sep 17 00:00:00 2001 From: Neil Date: Sat, 23 Feb 2019 13:00:04 -0500 Subject: [PATCH 10/16] =?UTF-8?q?media=20parameter=20attribute=20value=20n?= =?UTF-8?q?eeds=20to=20escape=20quotes=20inside=20quoted=20=E2=80=A6=20(#1?= =?UTF-8?q?24)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * media parameter attribute value needs to escape quotes inside quoted value * media parameters not at the end of the parameter list will be terminated with a semicolon * allow semicolon in "rest" value when checking if final quote needs escaping --- header.go | 102 +++++++++++++++++++++++++++++++++++++++++++++---- header_test.go | 31 +++++++++++++++ part_test.go | 2 +- 3 files changed, 126 insertions(+), 9 deletions(-) diff --git a/header.go b/header.go index a4a946c8..f934ed7f 100644 --- a/header.go +++ b/header.go @@ -228,15 +228,16 @@ func ParseMediaType(ctype string) (mtype string, params map[string]string, inval mctype := fixMangledMediaType(ctype, ";") mtype, params, err = mime.ParseMediaType(mctype) if err != nil { - // Some badly formed media types forget to send ; between fields. - mctype := fixMangledMediaType(ctype, " ") - if strings.Contains(mctype, `name=""`) { - mctype = strings.Replace(mctype, `name=""`, `name=" "`, -1) - } - mtype, params, err = mime.ParseMediaType(mctype) + // If the media parameter has special characters, ensure that + // it is quoted and that any existing quotes are escaped. + mtype, params, err = mime.ParseMediaType(fixUnescapedQuotes(fixUnquotedSpecials(mctype))) if err != nil { - // If the media parameter has special characters, ensure that it is quoted. - mtype, params, err = mime.ParseMediaType(fixUnquotedSpecials(mctype)) + // Some badly formed media types forget to send ; between fields. + mctype := fixMangledMediaType(ctype, " ") + if strings.Contains(mctype, `name=""`) { + mctype = strings.Replace(mctype, `name=""`, `name=" "`, -1) + } + mtype, params, err = mime.ParseMediaType(mctype) if err != nil { return "", nil, nil, errors.WithStack(err) } @@ -513,6 +514,91 @@ func fixUnquotedSpecials(s string) string { return clean.String() } +// fixUnescapedQuotes inspects for unescaped quotes inside of a quoted string and escapes them +// +// Input: application/rtf; charset=iso-8859-1; name=""V047411.rtf".rtf" +// Output: application/rtf; charset=iso-8859-1; name="\"V047411.rtf\".rtf" +func fixUnescapedQuotes(hvalue string) string { + params := strings.SplitAfter(hvalue, ";") + sb := &strings.Builder{} + for i := 0; i < len(params); i++ { + // Inspect for "=" byte. + eq := strings.IndexByte(params[i], '=') + if eq < 0 { + // No "=", must be the content-type or a comment. + sb.WriteString(params[i]) + continue + } + sb.WriteString(params[i][:eq]) + param := params[i][eq:] + startingQuote := strings.IndexByte(param, '"') + closingQuote := strings.LastIndexByte(param, '"') + // Opportunity to exit early if there are no quotes. + if startingQuote < 0 && closingQuote < 0 { + // This value is not quoted, write the value and carry on. + sb.WriteString(param) + continue + } + // Check if only one quote was found in the string. + if closingQuote == startingQuote { + // Append the next chunk of params here in case of a semicolon mid string. + param = fmt.Sprintf("%s%s", param, params[i+1]) + closingQuote = strings.LastIndexByte(param, '"') + i++ + if closingQuote == startingQuote { + return sb.String() + } + } + // Write the k/v separator back in along with everything up until the first quote. + sb.WriteByte('=') + // Starting quote + sb.WriteByte('"') + sb.WriteString(param[1:startingQuote]) + // Get just the value, less the outer quotes. + rest := param[closingQuote+1:] + // If there is stuff after the last quote then we should escape the first quote. + if len(rest) > 0 && rest != ";" { + sb.WriteString("\\\"") + } + param = param[startingQuote+1 : closingQuote] + escaped := false + for strIdx := range param { + switch param[strIdx] { + case '"': + // We are inside of a quoted string, so lets escape this guy if it isn't already escaped. + if !escaped { + sb.WriteByte('\\') + escaped = false + } + sb.WriteByte(param[strIdx]) + case '\\': + // Something is getting escaped, a quote is the only char that needs + // this, so lets assume the following char is a double-quote. + escaped = true + sb.WriteByte('\\') + default: + escaped = false + sb.WriteByte(param[strIdx]) + } + } + // If there is stuff after the last quote then we should escape + // the last quote, apply the rest and terminate with a quote. + switch rest { + case ";": + sb.WriteByte('"') + sb.WriteString(rest) + case "": + sb.WriteByte('"') + default: + sb.WriteByte('\\') + sb.WriteByte('"') + sb.WriteString(rest) + sb.WriteByte('"') + } + } + return sb.String() +} + // Detects a RFC-822 linear-white-space, passed to strings.FieldsFunc. func whiteSpaceRune(r rune) bool { return r == ' ' || r == '\t' || r == '\r' || r == '\n' diff --git a/header_test.go b/header_test.go index d6abcd28..11c1e119 100644 --- a/header_test.go +++ b/header_test.go @@ -346,6 +346,37 @@ func TestFixUnquotedSpecials(t *testing.T) { } } +func TestFixUnEscapedQuotes(t *testing.T) { + testCases := []struct { + input, want string + }{ + { + input: "application/rtf; charset=iso-8859-1; name=\"\"V047411.rtf\".rtf\"", + want: "application/rtf; charset=iso-8859-1; name=\"\\\"V047411.rtf\\\".rtf\"", + }, + { + input: "application/rtf; charset=iso-8859-1; name=b\"V047411.rtf\".rtf", + want: "application/rtf; charset=iso-8859-1; name=\"b\\\"V047411.rtf\\\".rtf\"", + }, + { + input: "application/rtf; charset=iso-8859-1; name=\"V047411.rtf\".rtf", + want: "application/rtf; charset=iso-8859-1; name=\"\\\"V047411.rtf\\\".rtf\"", + }, + { + input: "application/rtf; charset=iso-8859-1; name=\"V047411.rtf;\".rtf", + want: "application/rtf; charset=iso-8859-1; name=\"\\\"V047411.rtf;\\\".rtf\"", + }, + } + for _, tc := range testCases { + t.Run(tc.input, func(t *testing.T) { + got := fixUnescapedQuotes(tc.input) + if got != tc.want { + t.Errorf("\ngot: %s\nwant: %s", got, tc.want) + } + }) + } +} + func TestReadHeader(t *testing.T) { prefix := "From: hooman\n \n being\n" suffix := "Subject: hi\n\nPart body\n" diff --git a/part_test.go b/part_test.go index 9e51a751..c9ce6780 100644 --- a/part_test.go +++ b/part_test.go @@ -863,7 +863,7 @@ func TestContentTypeParamMissingClosingQuote(t *testing.T) { wantp := &enmime.Part{ PartID: "0", ContentType: "text/html", - Charset: "UTF-8Return-Path:", + Charset: "UTF-8Return-Path: bounce-810_HTML-769869545-477063-1070564-43@bounce.email.oflce57578375.com", } test.ComparePart(t, p, wantp) From 38ce1aea6813a2f9a9d88085a43bd7dae58f6e39 Mon Sep 17 00:00:00 2001 From: Neil Date: Sat, 23 Feb 2019 13:25:22 -0500 Subject: [PATCH 11/16] =?UTF-8?q?return=20empty=20string=20and=20nil=20err?= =?UTF-8?q?or=20if=20content-type=20is=20empty=20so=20that=20en=E2=80=A6?= =?UTF-8?q?=20(#125)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * return empty string and nil error if content-type is empty so that enmime.ErrorMissingContentType can be added to the errors slice and continue parsing * return empty string and nil error on specific mime: no media type error Co-Authored-By: requaos --- header.go | 3 ++ part.go | 11 ++++---- part_test.go | 32 ++++++++++++++++++++++ testdata/parts/empty-ctype-bad-content.raw | 20 ++++++++++++++ 4 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 testdata/parts/empty-ctype-bad-content.raw diff --git a/header.go b/header.go index f934ed7f..c53c5bb0 100644 --- a/header.go +++ b/header.go @@ -224,6 +224,9 @@ func quotedDisplayName(s string) string { func ParseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) { mtype, params, err = mime.ParseMediaType(ctype) if err != nil { + if err.Error() == "mime: no media type" { + return "", nil, nil, nil + } // Small hack to remove harmless charset duplicate params. mctype := fixMangledMediaType(ctype, ";") mtype, params, err = mime.ParseMediaType(mctype) diff --git a/part.go b/part.go index 070e3789..b96604f3 100644 --- a/part.go +++ b/part.go @@ -296,13 +296,14 @@ func (p *Part) decodeContent(r io.Reader) error { // Collect base64 errors. if b64cleaner != nil { for _, err := range b64cleaner.Errors { - p.Errors = append(p.Errors, &Error{ - Name: ErrorMalformedBase64, - Detail: err.Error(), - Severe: false, - }) + p.addWarning(ErrorMalformedBase64, err.Error()) } } + // Set empty content-type error. + if p.ContentType == "" { + p.addWarning( + ErrorMissingContentType, "content-type is empty for part id: %s", p.PartID) + } return nil } diff --git a/part_test.go b/part_test.go index c9ce6780..872700d8 100644 --- a/part_test.go +++ b/part_test.go @@ -773,6 +773,38 @@ func TestBarrenContentType(t *testing.T) { } } +func TestEmptyContentTypeBadContent(t *testing.T) { + r := test.OpenTestData("parts", "empty-ctype-bad-content.raw") + p, err := enmime.ReadParts(r) + if err != nil { + t.Fatal(err) + } + + wantp := &enmime.Part{ + PartID: "1", + Parent: test.PartExists, + Disposition: "", + } + test.ComparePart(t, p.FirstChild, wantp) + + expected := enmime.ErrorMissingContentType + satisfied := false + for _, perr := range p.FirstChild.Errors { + if perr.Name == expected { + satisfied = true + if perr.Severe { + t.Errorf("Expected Severe to be false, got true for %q", perr.Name) + } + } + } + if !satisfied { + t.Errorf( + "Did not find expected error on part. Expected %q, but had: %v", + expected, + p.Errors) + } +} + func TestMalformedContentTypeParams(t *testing.T) { r := test.OpenTestData("parts", "malformed-content-type-params.raw") p, err := enmime.ReadParts(r) diff --git a/testdata/parts/empty-ctype-bad-content.raw b/testdata/parts/empty-ctype-bad-content.raw new file mode 100644 index 00000000..3841f51b --- /dev/null +++ b/testdata/parts/empty-ctype-bad-content.raw @@ -0,0 +1,20 @@ +Content-Type: multipart/alternative; boundary="----=_Part_10541_215446765.98298273965074084" + +------=_Part_10541_215446765.98298273965074084 +Content-Type: + +Use the View Invoices function or locate the account on the List of Accounts table.
+
+Please do not reply to this e-mail message.
+
+Your Team
+
+
+If you have received this notification in error, or if you need further assistance accessing your invoice, please contact us.
+

+ + +Content-Transfer-Encoding: quoted-printable + + +------=_Part_10541_215446765.98298273965074084-- From 547d3380be3571f6164b811bb11b6c9c8890a98e Mon Sep 17 00:00:00 2001 From: davrux Date: Mon, 27 May 2019 16:44:40 +0200 Subject: [PATCH 12/16] Propper detection of text/plain attachment. (#129) * Detect mail with text/plain attachment. A Mail with the following header will use the attachment as text body. Content-Type: text/plain; name="test.csv" Content-Disposition: attachment; filename="test.csv" Fixed detection of attachment only mail. --- detect.go | 8 +++++++- detect_test.go | 1 + testdata/mail/attachment-only-text-attachment.raw | 11 +++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 testdata/mail/attachment-only-text-attachment.raw diff --git a/detect.go b/detect.go index 8e027afa..82bc2203 100644 --- a/detect.go +++ b/detect.go @@ -63,7 +63,13 @@ func detectTextHeader(header textproto.MIMEHeader, emptyContentTypeIsText bool) // detectBinaryBody returns true if the mail header defines a binary body. func detectBinaryBody(root *Part) bool { if detectTextHeader(root.Header, true) { - return false + // It is text/plain, but an attachment. + // Content-Type: text/plain; name="test.csv" + // Content-Disposition: attachment; filename="test.csv" + // Check for attachment only, or inline body is marked + // as attachment, too. + mediatype, _, _, _ := ParseMediaType(root.Header.Get(hnContentDisposition)) + return strings.ToLower(mediatype) == cdAttachment } isBin := detectAttachmentHeader(root.Header) diff --git a/detect_test.go b/detect_test.go index 11f86f15..ff480905 100644 --- a/detect_test.go +++ b/detect_test.go @@ -51,6 +51,7 @@ func TestDetectBinaryBody(t *testing.T) { {filename: "attachment-only.raw", disposition: "attachment"}, {filename: "attachment-only-inline.raw", disposition: "inline"}, {filename: "attachment-only-no-disposition.raw", disposition: "none"}, + {filename: "attachment-only-text-attachment.raw", disposition: "attachment"}, } for _, tt := range ttable { r, _ := os.Open(filepath.Join("testdata", "mail", tt.filename)) diff --git a/testdata/mail/attachment-only-text-attachment.raw b/testdata/mail/attachment-only-text-attachment.raw new file mode 100644 index 00000000..32015c92 --- /dev/null +++ b/testdata/mail/attachment-only-text-attachment.raw @@ -0,0 +1,11 @@ +To: bob@test.com +From: alice@test.com +Subject: Test +Message-ID: <56A0AA5F.4020203@test.com> +Date: Thu, 21 Jan 2016 10:52:31 +0100 +MIME-Version: 1.0 +Content-Type: text/plain; name="test.csv" +Content-Disposition: attachment; filename="test.csv" +Content-Transfer-Encoding: base64 + +VGVzdDtUZXN0Cg== From ab433f362cbdf7ac65c4ddbfa7faf383d43eecf4 Mon Sep 17 00:00:00 2001 From: James Hillyerd Date: Sun, 31 Mar 2019 09:18:54 -0700 Subject: [PATCH 13/16] travis: Add Go 1.12.x to tests --- .travis.yml | 1 + go.sum | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index a335214e..c8e0f65d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,3 +14,4 @@ after_success: go: - "1.11.x" + - "1.12.x" diff --git a/go.sum b/go.sum index 8bd7c4ae..4613134c 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,8 @@ github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8Bz github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/olekukonko/tablewriter v0.0.0-20180912035003-be2c049b30cc h1:rQ1O4ZLYR2xXHXgBCCfIIGnuZ0lidMQw2S5n1oOv+Wg= github.com/olekukonko/tablewriter v0.0.0-20180912035003-be2c049b30cc/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= From 031aa891ebf4a21b5f8662296499f3cd878d4997 Mon Sep 17 00:00:00 2001 From: Neil Date: Wed, 5 Jun 2019 12:54:42 -0400 Subject: [PATCH 14/16] #127 Refactor Read method on boundaryReader to avoid usage of UnreadByte() (#130) * #127 Refactor Read method on boundaryReader to avoid usage of UnreadByte() --- boundary.go | 54 ++++++++++++++++------------------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/boundary.go b/boundary.go index 3df064e7..19da4d93 100644 --- a/boundary.go +++ b/boundary.go @@ -82,35 +82,32 @@ func (b *boundaryReader) Read(dest []byte) (n int, err error) { } // Ensure that we can switch on the first byte of 'cs' without panic. if len(cs) > 0 { + padding := 1 + check := false + switch cs[0] { + // Check for carriage return as potential CRLF boundary prefix. + case '\r': + padding = 2 + check = true // Check for line feed as potential LF boundary prefix. case '\n': - peek, err := b.r.Peek(len(b.nlPrefix) + 2) + check = true + } + + if check { + peek, err := b.r.Peek(len(b.nlPrefix) + padding + 1) switch err { case nil: // Check the whitespace at the head of the peek to avoid checking for a boundary early. if bytes.HasPrefix(peek, []byte("\n\n")) || - bytes.HasPrefix(peek, []byte("\n\r")) { + bytes.HasPrefix(peek, []byte("\n\r")) || + bytes.HasPrefix(peek, []byte("\r\n\r")) || + bytes.HasPrefix(peek, []byte("\r\n\n")) { break } // Check the peek buffer for a boundary delimiter or terminator. - if b.isDelimiter(peek[1:]) || b.isTerminator(peek[1:]) { - // Check if we stored a carriage return. - if b.crBoundaryPrefix { - b.crBoundaryPrefix = false - // Let us now unread that back onto the io.Reader, since - // we have found what we are looking for and this byte - // belongs to the bounded block we are reading. - err = b.r.UnreadByte() - switch err { - case nil: - // Carry on. - case bufio.ErrInvalidUnreadByte: - // Carriage return boundary prefix bit already unread. - default: - return 0, errors.WithStack(err) - } - } + if b.isDelimiter(peek[padding:]) || b.isTerminator(peek[padding:]) { // We have found our boundary terminator, lets write out the final bytes // and return io.EOF to indicate that this section read is complete. n, err = b.buffer.Read(dest) @@ -129,25 +126,6 @@ func (b *boundaryReader) Read(dest []byte) (n int, err error) { default: continue } - // Checked '\n' was not prefix to a boundary. - if b.crBoundaryPrefix { - b.crBoundaryPrefix = false - // Stored '\r' should be written to the buffer now. - err = b.buffer.WriteByte('\r') - if err != nil { - return 0, errors.WithStack(err) - } - } - // Check for carriage return as potential CRLF boundary prefix. - case '\r': - _, err := b.r.ReadByte() - if err != nil { - return 0, errors.WithStack(err) - } - // Flag the boundary reader to indicate that we - // have stored a '\r' as a potential CRLF prefix. - b.crBoundaryPrefix = true - continue } } From 1009b48fae0c2670f8c16cd9dd5612aea19e1316 Mon Sep 17 00:00:00 2001 From: James Hillyerd Date: Sat, 10 Aug 2019 16:46:06 -0700 Subject: [PATCH 15/16] Update changelog --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92a7d5a9..dd96cebb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ Change Log All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] + +### Added +- Make ParseMediaType public. + +### Fixed +- Improve quoted display name handling (#112, thanks to requaos.) +- Refactor MIME part boundary detection (thanks to requaos.) +- Several improvements to MIME attribute decoding (thanks to requaos.) +- Detect text/plain attachments properly (thanks to davrux.) + + ## [0.5.0] - 2018-12-15 ### Added From f7175c471117c45bc8ddb6f72781901494cf3dd2 Mon Sep 17 00:00:00 2001 From: James Hillyerd Date: Sat, 10 Aug 2019 16:58:01 -0700 Subject: [PATCH 16/16] Add 0.6.0 to changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd96cebb..14fb0263 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ Change Log All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). -## [Unreleased] +## [0.6.0] - 2019-08-10 ### Added - Make ParseMediaType public. @@ -96,6 +96,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Initial implementation of MIME encoding, using `enmime.MailBuilder` [Unreleased]: https://github.com/jhillyerd/enmime/compare/master...develop +[0.6.0]: https://github.com/jhillyerd/enmime/compare/v0.5.0...v0.6.0 [0.5.0]: https://github.com/jhillyerd/enmime/compare/v0.4.0...v0.5.0 [0.4.0]: https://github.com/jhillyerd/enmime/compare/v0.3.0...v0.4.0 [0.3.0]: https://github.com/jhillyerd/enmime/compare/v0.2.1...v0.3.0