Skip to content

Commit

Permalink
Rework uuencoded attachment decoding
Browse files Browse the repository at this point in the history
Move all processing into our `MailHandler` library which allows us to
simplify the `IncomingMessage` model and attachment parsing.
  • Loading branch information
gbp committed Sep 11, 2023
1 parent 0fb77c8 commit 905d262
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 34 deletions.
23 changes: 1 addition & 22 deletions app/models/incoming_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,6 @@ def clear_in_database_caches!
# search results
def _cache_main_body_text
text = get_main_body_text_internal
# Strip the uudecode parts from main text
# - this also effectively does a .dup as well, so text mods don't alter original
text = text.split(/^begin.+^`\n^end\n/m).join(" ")

if text.size > 1_000_000 # 1 MB ish
raise "main body text more than 1 MB, need to implement clipping like for attachment text, or there is some other MIME decoding problem or similar"
Expand All @@ -328,7 +325,7 @@ def _cache_main_body_text
self.cached_main_body_text_folded = folded_quoted_text.delete("\0")
save!
end
# Returns body text from main text part of email, converted to UTF-8, with uudecode removed,
# Returns body text from main text part of email, converted to UTF-8,
# emails and privacy sensitive things remove, censored, and folded to remove excess quoted text
# (marked with FOLDED_QUOTED_SECTION)
# TODO: returns a .dup of the text, so calling functions can in place modify it
Expand Down Expand Up @@ -414,16 +411,6 @@ def get_main_body_text_part(leaves=[])
p
end

# Returns attachments that are uuencoded in main body part
def _uudecode_attachments(text, start_part_number)
MailHandler.uudecode(text, start_part_number).map do |attrs|
hexdigest = attrs.delete(:hexdigest)
attachment = foi_attachments.find_or_initialize_by(hexdigest: hexdigest)
attachment.attributes = attrs
attachment
end
end

def get_attachments_for_display
parse_raw_email!
# return what user would consider attachments, i.e. not the main body
Expand Down Expand Up @@ -462,14 +449,6 @@ def extract_attachments
# identical.
main_part = get_main_body_text_part(attachments)

# We don't use get_main_body_text_internal, as we want to avoid charset
# conversions, since _uudecode_attachments needs to deal with those.
# e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
if main_part
c = _mail.count_first_uudecode_count
attachments += _uudecode_attachments(main_part.body, c)
end

# Purge old attachments that have been rebuilt with a new hexdigest
(foi_attachments - attachments).each(&:mark_for_destruction)
end
Expand Down
22 changes: 10 additions & 12 deletions lib/mail_handler/backends/mail_backend.rb
Original file line number Diff line number Diff line change
Expand Up @@ -366,15 +366,21 @@ def extract_attached_message_headers(leaf)
def get_attachment_attributes(mail)
get_attachment_leaves(mail).inject([]) do |acc, leaf|
original_body = body = get_part_body(leaf)
content_type = get_content_type(leaf)

if leaf.within_rfc822_attachment
within_rfc822_subject = get_within_rfc822_subject(leaf)
body = extract_attached_message_headers(leaf)
end

if content_type == 'text/plain'
uudecoded_parts = uudecode(body, leaf.url_part_number)
body = body.gsub(/^begin.+^`\n^end\n/m, '')
end

acc.push(
url_part_number: leaf.url_part_number,
content_type: get_content_type(leaf),
content_type: content_type,
filename: get_part_file_name(leaf),
charset: leaf.charset,
within_rfc822_subject: within_rfc822_subject,
Expand All @@ -383,6 +389,8 @@ def get_attachment_attributes(mail)
hexdigest: Digest::MD5.hexdigest(body)
)

acc += uudecoded_parts if uudecoded_parts

acc
end
end
Expand Down Expand Up @@ -424,17 +432,6 @@ def caluclate_hexdigest(body)
mail, body: mail_body, nested: true
) unless mail_body.empty?

return attributes if attributes

# check uuencoded attachments which can be located in plain text
uuencoded_attributes = all_attributes.inject([]) do |acc, attrs|
next acc unless attrs[:content_type] == 'text/plain'
acc += uudecode(attrs[:body], attrs[:url_part_number])
end
attributes ||= uuencoded_attributes.find do |attributes|
attributes[:hexdigest] == hexdigest
end

attributes
end

Expand All @@ -457,6 +454,7 @@ def uudecode(text, start_part_number)

{
body: body,
original_body: body,
filename: filename,
content_type: content_type,
hexdigest: hexdigest,
Expand Down

0 comments on commit 905d262

Please sign in to comment.