Skip to content

Commit

Permalink
Merge branch 'release/0.4.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
jhillyerd committed Nov 22, 2018
2 parents d07d9bb + dd9b622 commit e76110b
Show file tree
Hide file tree
Showing 23 changed files with 432 additions and 107 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ _testmain.go

*.exe

# goland ide
.idea

# vim swp files
*.swp

Expand Down
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@ Change Log
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## [0.4.0] - 2018-11-21

### Added
- Override declared character set if another is detected with high confidence
(thanks to nerdlich.)
- Handle unquoted specials in media type parameters (thanks to requaos.)
- Handle barren Content-Type headers (thanks to dcormier.)
- Better handle malformed media type parameters (thanks to dcormier.)

### Changed
- Use iso-8859-1 character map when implicitly declared (thanks to requaos.)
- Treat "inline" disposition as message content, not attachment unless it is
accompanied by parameters (e.g. a filename, thanks to requaos.)

## [0.3.0] - 2018-11-01

### Added
Expand Down Expand Up @@ -55,6 +69,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- Initial implementation of MIME encoding, using `enmime.MailBuilder`

[Unreleased]: https://github.com/jhillyerd/enmime/compare/master...develop
[0.4.0]: https://github.com/jhillyerd/enmime/compare/v0.3.0...v0.4.0
[0.3.0]: https://github.com/jhillyerd/enmime/compare/v0.2.1...v0.3.0
[0.2.1]: https://github.com/jhillyerd/enmime/compare/v0.2.0...v0.2.1
[0.2.0]: https://github.com/jhillyerd/enmime/compare/v0.1.0...v0.2.0
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ http://godoc.org/github.com/jhillyerd/enmime
Part readers: `Part.Read()` and `Part.Utf8Reader` have been removed. Please use
`Part.Content` instead.

A brief guide to migrating from the old 2016 go.enmime API is available here:
A brief guide to migrating from the old 2016 `go.enmime` API is available here:
https://github.com/jhillyerd/enmime/wiki/Enmime-Migration-Guide


Expand Down
12 changes: 6 additions & 6 deletions detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func detectMultipartMessage(root *Part) bool {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mediatype, _, err := parseMediaType(ctype)
mediatype, _, _, err := parseMediaType(ctype)
if err != nil {
return false
}
Expand All @@ -29,13 +29,13 @@ func detectMultipartMessage(root *Part) bool {
// - Content-Disposition: inline; filename="frog.jpg"
// - Content-Type: attachment; filename="frog.jpg"
func detectAttachmentHeader(header textproto.MIMEHeader) bool {
mediatype, _, _ := parseMediaType(header.Get(hnContentDisposition))
mediatype, params, _, _ := parseMediaType(header.Get(hnContentDisposition))
if strings.ToLower(mediatype) == cdAttachment ||
strings.ToLower(mediatype) == cdInline {
(strings.ToLower(mediatype) == cdInline && len(params) > 0) {
return true
}

mediatype, _, _ = parseMediaType(header.Get(hnContentType))
mediatype, _, _, _ = parseMediaType(header.Get(hnContentType))
return strings.ToLower(mediatype) == cdAttachment
}

Expand All @@ -48,7 +48,7 @@ func detectTextHeader(header textproto.MIMEHeader, emptyContentTypeIsText bool)
return true
}

mediatype, _, err := parseMediaType(ctype)
mediatype, _, _, err := parseMediaType(ctype)
if err != nil {
return false
}
Expand All @@ -72,7 +72,7 @@ func detectBinaryBody(root *Part) bool {
// 'text/plain' or 'text/html'.
// Example:
// Content-Type: application/pdf; name="doc.pdf"
mediatype, _, _ := parseMediaType(root.Header.Get(hnContentType))
mediatype, _, _, _ := parseMediaType(root.Header.Get(hnContentType))
mediatype = strings.ToLower(mediatype)
if mediatype != ctTextPlain && mediatype != ctTextHTML {
return true
Expand Down
15 changes: 15 additions & 0 deletions detect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ func TestDetectAttachmentHeader(t *testing.T) {
header: textproto.MIMEHeader{
"Content-Type": []string{"attachment; filename=\"test.jpg\""}},
},
{
want: false,
header: textproto.MIMEHeader{
"Content-Disposition": []string{"inline"}},
},
{
want: false,
header: textproto.MIMEHeader{
"Content-Disposition": []string{"inline; broken"}},
},
{
want: true,
header: textproto.MIMEHeader{
"Content-Disposition": []string{"attachment; broken"}},
},
{
want: true,
header: textproto.MIMEHeader{
Expand Down
13 changes: 2 additions & 11 deletions envelope.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
var charset string
var isHTML bool
if ctype := root.Header.Get(hnContentType); ctype != "" {
if mediatype, mparams, err := parseMediaType(ctype); err == nil {
if mediatype, mparams, _, err := parseMediaType(ctype); err == nil {
isHTML = (mediatype == ctTextHTML)
if mparams[hpCharset] != "" {
charset = mparams[hpCharset]
Expand Down Expand Up @@ -254,15 +254,6 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
// Converted from charset in HTML
return nil
}

// Use charset found in header
if convHTML, err := coding.ConvertToUTF8String(charset, root.Content); err == nil {
// Successful conversion
e.HTML = convHTML
} else {
// Conversion failed
root.addWarning(ErrorCharsetConversion, err.Error())
}
} else {
e.Text = string(root.Content)
}
Expand All @@ -274,7 +265,7 @@ func parseTextOnlyBody(root *Part, e *Envelope) error {
func parseMultiPartBody(root *Part, e *Envelope) error {
// Parse top-level multipart
ctype := root.Header.Get(hnContentType)
mediatype, params, err := parseMediaType(ctype)
mediatype, params, _, err := parseMediaType(ctype)
if err != nil {
return fmt.Errorf("Unable to parse media type: %v", err)
}
Expand Down
13 changes: 13 additions & 0 deletions envelope_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,19 @@ func TestDuplicateParamsInMime(t *testing.T) {
}
}

func TestUnquotedSpecialCharParamsInMime(t *testing.T) {
msg := test.OpenTestData("mail", "mime-unquoted-tspecials-param.raw")
e, err := enmime.ReadEnvelope(msg)

if err != nil {
t.Fatal("Failed to parse MIME:", err)
}

if e.Attachments[0].FileName != "Invoice_(302232133150612).pdf" {
t.Fatal("Mail should have a part with filename Invoice_(302232133150612).pdf")
}
}

func TestBadContentTypeInMime(t *testing.T) {
msg := test.OpenTestData("mail", "mime-bad-content-type.raw")
e, err := enmime.ReadEnvelope(msg)
Expand Down
2 changes: 2 additions & 0 deletions error.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ const (
ErrorContentEncoding = "Content Encoding"
// ErrorPlainTextFromHTML name.
ErrorPlainTextFromHTML = "Plain Text from HTML"
// ErrorCharsetDeclaration name.
ErrorCharsetDeclaration = "Character Set Declaration Mismatch"
)

// Error describes an error encountered while parsing.
Expand Down
1 change: 1 addition & 0 deletions error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func TestErrorEnvelopeWarnings(t *testing.T) {
{"unk-charset-html-only.raw", ErrorCharsetConversion},
{"unk-charset-part.raw", ErrorCharsetConversion},
{"malformed-base64-attach.raw", ErrorMalformedBase64},
{"incorrect-charset.raw", ErrorCharsetDeclaration},
}

for _, tt := range files {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module github.com/jhillyerd/enmime

require (
github.com/go-test/deep v1.0.1
github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561
github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0
github.com/mattn/go-runewidth v0.0.3 // indirect
github.com/olekukonko/tablewriter v0.0.0-20180912035003-be2c049b30cc // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
github.com/go-test/deep v1.0.1 h1:UQhStjbkDClarlmv0am7OXXO4/GaPdCGiUiMTvi28sg=
github.com/go-test/deep v1.0.1/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561 h1:aBzukfDxQlCTVS0NBUjI5YA3iVeaZ9Tb5PxNrrIP1xs=
github.com/gogs/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0 h1:xqgexXAGQgY3HAjNPSaCqn5Aahbo5TKsmhp8VRfr1iQ=
github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8BzLR4=
Expand Down
65 changes: 58 additions & 7 deletions header.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"mime"
"net/textproto"
Expand All @@ -26,6 +27,13 @@ const (
ctTextPlain = "text/plain"
ctTextHTML = "text/html"

// Used as a placeholder in case of malformed Content-Type headers
ctPlaceholder = "x-not-a-mime-type/x-not-a-mime-type"
// Used as a placeholder param value in case of malformed
// Content-Type/Content-Disposition parameters that lack values.
// E.g.: Content-Type: text/html;iso-8859-1
pvPlaceholder = "not-a-param-value"

// Standard Transfer encodings
cte7Bit = "7bit"
cte8Bit = "8bit"
Expand Down Expand Up @@ -187,7 +195,7 @@ func decodeToUTF8Base64Header(input string) string {
}

// parseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function.
func parseMediaType(ctype string) (mtype string, params map[string]string, err error) {
func parseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
mtype, params, err = mime.ParseMediaType(ctype)
if err != nil {
// Small hack to remove harmless charset duplicate params.
Expand All @@ -201,11 +209,25 @@ func parseMediaType(ctype string) (mtype string, params map[string]string, err e
}
mtype, params, err = mime.ParseMediaType(mctype)
if err != nil {
return "", nil, err
// If the media parameter has special characters, ensure that it is quoted.
mtype, params, err = mime.ParseMediaType(fixUnquotedSpecials(mctype))
if err != nil {
return "", nil, nil, err
}
}
}
}
return mtype, params, err
if mtype == ctPlaceholder {
mtype = ""
}
for name, value := range params {
if value != pvPlaceholder {
continue
}
invalidParams = append(invalidParams, name)
delete(params, name)
}
return mtype, params, invalidParams, err
}

// fixMangledMediaType is used to insert ; separators into media type strings that lack them, and
Expand All @@ -216,20 +238,49 @@ func fixMangledMediaType(mtype, sep string) string {
}
parts := strings.Split(mtype, sep)
mtype = ""
for _, p := range parts {
if strings.Contains(p, "=") {
for i, p := range parts {
switch i {
case 0:
if p == "" {
// The content type is completely missing. Put in a placeholder.
p = ctPlaceholder
}
default:
if !strings.Contains(p, "=") {
p = p + "=" + pvPlaceholder
}
pair := strings.Split(p, "=")
if strings.Contains(mtype, pair[0]+"=") {
// Ignore repeated parameters.
continue
}
}
mtype += p + ";"
mtype += p
// Only terminate with semicolon if not the last parameter and if it doesn't already have a
// semicolon.
if i != len(parts)-1 && !strings.HasSuffix(mtype, ";") {
mtype += ";"
}
}
if strings.HasSuffix(mtype, ";") {
mtype = mtype[:len(mtype)-1]
}
return mtype
}

// Detects a RFC-822 linear-white-space, passed to strings.FieldsFunc
// fixUnquotedSpecials as defined in https://www.w3.org/Protocols/rfc1341/4_Content-Type.html
func fixUnquotedSpecials(s string) string {
if strings.Contains(s, "name=") {
nameSplit := strings.SplitAfter(s, "name=")
if strings.ContainsAny(nameSplit[1], "()<>@,;:\\/[]?.=") &&
!strings.HasSuffix(nameSplit[1], "\"") {
return fmt.Sprintf("%s\"%s\"", nameSplit[0], nameSplit[1])
}
}
return s
}

// Detects a RFC-822 linear-white-space, passed to strings.FieldsFunc.
func whiteSpaceRune(r rune) bool {
return r == ' ' || r == '\t' || r == '\r' || r == '\n'
}
21 changes: 18 additions & 3 deletions header_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,17 +169,32 @@ func TestFixMangledMediaType(t *testing.T) {
{
input: "application/pdf name=\"file.pdf\"",
sep: " ",
want: "application/pdf;name=\"file.pdf\";",
want: "application/pdf;name=\"file.pdf\"",
},
{
input: "one/two; name=\"file.two\"; name=\"file.two\"",
sep: ";",
want: "one/two; name=\"file.two\";",
want: "one/two; name=\"file.two\"",
},
{
input: "one/two name=\"file.two\" name=\"file.two\"",
sep: " ",
want: "one/two;name=\"file.two\";",
want: "one/two;name=\"file.two\"",
},
{
input: "; name=\"file.two\"",
sep: ";",
want: ctPlaceholder + "; name=\"file.two\"",
},
{
input: "one/two;iso-8859-1",
sep: ";",
want: "one/two;iso-8859-1=" + pvPlaceholder,
},
{
input: "one/two; name=\"file.two\"; iso-8859-1",
sep: ";",
want: "one/two; name=\"file.two\"; iso-8859-1=" + pvPlaceholder,
},
}
for _, tc := range testCases {
Expand Down
12 changes: 6 additions & 6 deletions internal/coding/charsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,13 @@ var encodings = map[string]struct {
"cp819": {charmap.Windows1252, "windows-1252"},
"csisolatin1": {charmap.Windows1252, "windows-1252"},
"ibm819": {charmap.Windows1252, "windows-1252"},
"iso-8859-1": {charmap.Windows1252, "windows-1252"},
"iso-8859-1": {charmap.ISO8859_1, "iso-8859-1"},
"iso-ir-100": {charmap.Windows1252, "windows-1252"},
"iso8859-1": {charmap.Windows1252, "windows-1252"},
"iso8859_1": {charmap.Windows1252, "windows-1252"},
"iso88591": {charmap.Windows1252, "windows-1252"},
"iso_8859-1": {charmap.Windows1252, "windows-1252"},
"iso_8859-1:1987": {charmap.Windows1252, "windows-1252"},
"iso8859-1": {charmap.ISO8859_1, "iso-8859-1"},
"iso8859_1": {charmap.ISO8859_1, "iso-8859-1"},
"iso88591": {charmap.ISO8859_1, "iso-8859-1"},
"iso_8859-1": {charmap.ISO8859_1, "iso-8859-1"},
"iso_8859-1:1987": {charmap.ISO8859_1, "iso-8859-1"},
"l1": {charmap.Windows1252, "windows-1252"},
"latin1": {charmap.Windows1252, "windows-1252"},
"us-ascii": {charmap.Windows1252, "windows-1252"},
Expand Down
Loading

0 comments on commit e76110b

Please sign in to comment.