From d368e2dbb6408ba1328ad56ba6bb46682e7dabdd Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Tue, 29 Aug 2023 15:32:01 +0100 Subject: [PATCH] Fix embedded attachment processing For attachments which are embedded within a RFC822 attachment, were unable process some attachment if they were received a long time ago. This is down to changes in the underlying mail gem which results in slight difference in the headers which get appended to the attachment body. This change fixes this by only matching against the body of embedded attachments, ignoring headers and any slight changes. Fixes #7876 --- app/models/incoming_message.rb | 1 + lib/mail_handler/backends/mail_backend.rb | 31 ++++++++++++------- .../backends/mail_backend_spec.rb | 16 +++++----- spec/lib/mail_handler/mail_handler_spec.rb | 1 + 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index d15d8bb950..84be9aec3a 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -462,6 +462,7 @@ def extract_attachments _mail = raw_email.mail! attachment_attributes = MailHandler.get_attachment_attributes(_mail) attachment_attributes = attachment_attributes.inject({}) do |memo, attrs| + attrs.delete(:body_without_headers) memo[attrs[:hexdigest]] = attrs memo end diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index f5904f835f..e26a20bfde 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -369,16 +369,19 @@ def get_attachment_attributes(mail) if leaf.within_rfc822_attachment within_rfc822_subject = get_within_rfc822_subject(leaf) - body = extract_attached_message_headers(leaf) + body_with_header = extract_attached_message_headers(leaf) end - leaf_attributes = { url_part_number: leaf.url_part_number, - content_type: get_content_type(leaf), - filename: get_part_file_name(leaf), - charset: leaf.charset, - within_rfc822_subject: within_rfc822_subject, - body: body, - hexdigest: Digest::MD5.hexdigest(body) } + { + url_part_number: leaf.url_part_number, + content_type: get_content_type(leaf), + filename: get_part_file_name(leaf), + charset: leaf.charset, + within_rfc822_subject: within_rfc822_subject, + body_without_headers: body, + body: body_with_header || body, + hexdigest: Digest::MD5.hexdigest(body_with_header || body) + } end end @@ -397,21 +400,25 @@ def attempt_to_find_original_attachment_attributes(mail, body:, nested: false) all_attributes = get_attachment_attributes(mail) attributes = all_attributes.find do |attrs| - # ensure both bodies have the same line endings + # ensure bodies have the same line endings hexdigest_1 = Digest::MD5.hexdigest( Mail::Utilities.to_crlf(attrs[:body]) ) hexdigest_2 = Digest::MD5.hexdigest( + Mail::Utilities.to_crlf(attrs[:body_without_headers]) + ) + hexdigest_3 = Digest::MD5.hexdigest( Mail::Utilities.to_crlf(body) ) - hexdigest_1 == hexdigest_2 + hexdigest_1 == hexdigest_3 || hexdigest_2 == hexdigest_3 end return attributes if nested + mail_body = Mail.new(body).body attributes ||= attempt_to_find_original_attachment_attributes( - mail, body: Mail.new(body).to_s, nested: true - ) + mail, body: mail_body, nested: true + ) unless mail_body.empty? attributes end diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb index 7a5143249b..9b888717bd 100644 --- a/spec/lib/mail_handler/backends/mail_backend_spec.rb +++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb @@ -437,15 +437,16 @@ let(:mail) do mail_attachment = Mail.new( <<~EML - Date: Tue, 08 Aug 2023 10:00:00 +0000 - Message-ID: <64d611ca31906_ccf71e5039542@localhost> + Subject: Attached email + + Hello world EML ).to_s Mail.new do add_file filename: 'crlf.txt', content: "foo\r\nfoo" add_file filename: 'lf.txt', content: "bar\nbar" - add_file filename: 'mail.txt', content: mail_attachment + add_file filename: 'mail.eml', content: mail_attachment end end @@ -465,14 +466,15 @@ it { is_expected.to include(body: "bar\nbar") } end - context 'when body missing leading zero on dates' do + context 'when attached email headers are different' do let(:body) do <<~EML - Date: Tue, 8 Aug 2023 10:00:00 +0000 - Message-ID: <64d611ca31906_ccf71e5039542@localhost> + Subject: A different subject + + Hello world EML end - it { expect(attributes[:body]).to include('08 Aug 2023') } + it { is_expected.to include(body_without_headers: "Hello world\n") } end context 'when body does not match' do diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index b3c43e371c..a07f6774a6 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -545,6 +545,7 @@ def expect_header_string(fixture_file, header, header_string) attributes.each_with_index do |attr, index| attr.delete(:charset) attr.delete(:body) + attr.delete(:body_without_headers) attr.delete(:hexdigest) expect(attr).to eq(expected_attributes[index]) end