From af8509e69509696547742f9a9a1e9a6a2db454fd Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Thu, 28 Sep 2023 17:05:22 +0100 Subject: [PATCH 1/6] Update attachment masking before action callbacks We're going to make a change which might mean attachments might be rebuilt within the `FoiAttachmentMaskJob` this means the signed global ID might not return the attachment in this controller. This change ensures their is a referer parameter and if the attachment can't be loaded then we redirect back to the referer. Note, we don't need to worry about people modifying the referer to get users redirected to a third party domain as this is protected against by Rails. --- .../attachment_masks_controller.rb | 11 ++++---- .../attachment_masks_controller_spec.rb | 28 ++++++++++++++++--- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/app/controllers/attachment_masks_controller.rb b/app/controllers/attachment_masks_controller.rb index 4ac0c7d487..e68f9def0c 100644 --- a/app/controllers/attachment_masks_controller.rb +++ b/app/controllers/attachment_masks_controller.rb @@ -5,7 +5,7 @@ class AttachmentMasksController < ApplicationController before_action :set_no_crawl_headers before_action :find_attachment - before_action :ensure_attachment, :ensure_referer + before_action :ensure_referer, :ensure_attachment def wait if @attachment.masked? @@ -38,13 +38,14 @@ def set_no_crawl_headers def find_attachment @attachment = GlobalID::Locator.locate_signed(params[:id]) - end - - def ensure_attachment - raise ActiveRecord::RecordNotFound unless @attachment + rescue ActiveRecord::RecordNotFound end def ensure_referer raise RouteNotFound unless params[:referer].present? end + + def ensure_attachment + redirect_to(params[:referer]) unless @attachment + end end diff --git a/spec/controllers/attachment_masks_controller_spec.rb b/spec/controllers/attachment_masks_controller_spec.rb index ab4c4bae51..11b636e6e3 100644 --- a/spec/controllers/attachment_masks_controller_spec.rb +++ b/spec/controllers/attachment_masks_controller_spec.rb @@ -44,11 +44,21 @@ def wait end end + context "when attachment can't be found" do + it 'redirects to referer' do + allow(GlobalID::Locator).to receive(:locate_signed).with('ABC'). + and_raise(ActiveRecord::RecordNotFound) + wait + expect(response).to redirect_to('/referer') + end + end + context 'without attachment' do let(:attachment) { nil } - it 'raises record not found error' do - expect { wait }.to raise_error(ActiveRecord::RecordNotFound) + it 'redirects to referer' do + wait + expect(response).to redirect_to('/referer') end end @@ -90,11 +100,21 @@ def done end end + context "when attachment can't be found" do + it 'redirects to referer' do + allow(GlobalID::Locator).to receive(:locate_signed).with('ABC'). + and_raise(ActiveRecord::RecordNotFound) + done + expect(response).to redirect_to('/referer') + end + end + context 'without attachment' do let(:attachment) { nil } - it 'raises record not found error' do - expect { done }.to raise_error(ActiveRecord::RecordNotFound) + it 'redirects to referer' do + done + expect(response).to redirect_to('/referer') end end From 5839ac1f7408a81e7fdd69519162cc758464e4f1 Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Mon, 2 Oct 2023 11:40:23 +0100 Subject: [PATCH 2/6] Remove attachment prominence check This check isn't doing anything. It was meant to protect when parsing incoming messages to stop hidden attachments from being made public again. We don't actually yet have this parsing in place instead we've focused on finding the original attachment [1]. For the remainder of cases we will need to parse the raw email again but we have added separate check [2] so this one here is redundant. [1] https://github.com/mysociety/alaveteli/issues/7875 [2] https://github.com/mysociety/alaveteli/pull/7920 --- app/models/foi_attachment.rb | 4 ---- spec/models/foi_attachment_spec.rb | 13 ------------- 2 files changed, 17 deletions(-) diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 104f21c5d3..5bc1905e58 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -133,10 +133,6 @@ def unmasked_body hexdigest: hexdigest ) rescue MailHandler::MismatchedAttachmentHexdigest - unless is_public? - raise(MissingAttachment, "prominence not public (ID=#{id})") - end - unless file.attached? raise(MissingAttachment, "file not attached (ID=#{id})") end diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index 02920bcb16..107395d56d 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -218,19 +218,6 @@ and_raise(MailHandler::MismatchedAttachmentHexdigest) end - context 'when attachment has prominence' do - let(:foi_attachment) do - FactoryBot.create(:body_text, prominence: 'hidden') - end - - it 'raises missing attachment exception' do - expect { unmasked_body }.to raise_error( - FoiAttachment::MissingAttachment, - "prominence not public (ID=#{foi_attachment.id})" - ) - end - end - context 'when attachment file is unattached' do let(:foi_attachment) do FactoryBot.create(:body_text) From 5d5f2621fbd73e2913f9fce92732f30e20fef764 Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Wed, 27 Sep 2023 22:46:20 +0100 Subject: [PATCH 3/6] Update FoiAttachmentMaskJob Catch and rescue from `MissingAttachment` exceptions. Re-parse the incoming message to rebuild the attachments instead. Fixes: #7875 --- app/jobs/foi_attachment_mask_job.rb | 3 + app/models/foi_attachment.rb | 8 +-- spec/jobs/foi_attachment_mask_job_spec.rb | 8 +++ spec/models/foi_attachment_spec.rb | 68 +++++++++-------------- 4 files changed, 39 insertions(+), 48 deletions(-) diff --git a/app/jobs/foi_attachment_mask_job.rb b/app/jobs/foi_attachment_mask_job.rb index f9939adab3..86845c3803 100644 --- a/app/jobs/foi_attachment_mask_job.rb +++ b/app/jobs/foi_attachment_mask_job.rb @@ -31,6 +31,9 @@ def perform(attachment) end attachment.update(body: body, masked_at: Time.zone.now) + + rescue FoiAttachment::MissingAttachment + incoming_message.parse_raw_email!(true) end private diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 5bc1905e58..be107cabcd 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -133,17 +133,13 @@ def unmasked_body hexdigest: hexdigest ) rescue MailHandler::MismatchedAttachmentHexdigest - unless file.attached? - raise(MissingAttachment, "file not attached (ID=#{id})") - end - attributes = MailHandler.attempt_to_find_original_attachment_attributes( raw_email.mail, body: file.download - ) + ) if file.attached? unless attributes - raise(MissingAttachment, "unable to find original (ID=#{id})") + raise MissingAttachment, "attachment missing in raw email (ID=#{id})" end update(hexdigest: attributes[:hexdigest]) diff --git a/spec/jobs/foi_attachment_mask_job_spec.rb b/spec/jobs/foi_attachment_mask_job_spec.rb index 8738054286..771e892b52 100644 --- a/spec/jobs/foi_attachment_mask_job_spec.rb +++ b/spec/jobs/foi_attachment_mask_job_spec.rb @@ -90,4 +90,12 @@ def perform expect(attachment.body).to_not include 'dull' expect(attachment.body).to include 'Horse' end + + it 'reparses raw email after rescuing FoiAttachment::MissingAttachment' do + allow(attachment).to receive(:unmasked_body).and_raise( + FoiAttachment::MissingAttachment + ) + expect(incoming_message).to receive(:parse_raw_email!).with(true) + perform + end end diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index 107395d56d..b3508966a3 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -198,7 +198,7 @@ before do allow(foi_attachment).to receive(:raw_email). - and_return(double.as_null_object) + and_return(double(mail: Mail.new)) end context 'when mail handler finds original attachment by hexdigest' do @@ -212,57 +212,41 @@ end end - context 'when mail handler can not original attachment by hexdigest' do + context 'when able to find original attachment through other means' do before do allow(MailHandler).to receive(:attachment_body_for_hexdigest). and_raise(MailHandler::MismatchedAttachmentHexdigest) - end - context 'when attachment file is unattached' do - let(:foi_attachment) do - FactoryBot.create(:body_text) - end + allow(MailHandler).to receive( + :attempt_to_find_original_attachment_attributes + ).and_return(hexdigest: 'ABC', body: 'hereistheunmaskedtext') + end - it 'raises missing attachment exception' do - foi_attachment.file.purge + it 'updates the hexdigest' do + expect { unmasked_body }.to change { foi_attachment.hexdigest }. + to('ABC') + end - expect { unmasked_body }.to raise_error( - FoiAttachment::MissingAttachment, - "file not attached (ID=#{foi_attachment.id})" - ) - end + it 'returns the attachment body from the raw email' do + is_expected.to eq('hereistheunmaskedtext') end + end - context 'when unable to find original attachment through other means' do - before do - allow(MailHandler).to receive( - :attempt_to_find_original_attachment_attributes - ).and_return(nil) - end - - it 'raises missing attachment exception' do - expect { unmasked_body }.to raise_error( - FoiAttachment::MissingAttachment, - "unable to find original (ID=#{foi_attachment.id})" - ) - end + context 'when unable to find original attachment through other means' do + before do + allow(MailHandler).to receive(:attachment_body_for_hexdigest). + and_raise(MailHandler::MismatchedAttachmentHexdigest) + + allow(MailHandler).to receive( + :attempt_to_find_original_attachment_attributes + ).and_return(nil) end - context 'when able to find original attachment through other means' do - before do - allow(MailHandler).to receive( - :attempt_to_find_original_attachment_attributes - ).and_return(hexdigest: 'ABC', body: 'hereistheunmaskedtext') - end - - it 'updates the hexdigest' do - expect { unmasked_body }.to change { foi_attachment.hexdigest }. - to('ABC') - end - - it 'returns the attachment body from the raw email' do - is_expected.to eq('hereistheunmaskedtext') - end + it 'raises missing attachment exception' do + expect { unmasked_body }.to raise_error( + FoiAttachment::MissingAttachment, + "attachment missing in raw email (ID=#{foi_attachment.id})" + ) end end From f3a035ec52e163252d561bf38226e38d54c7da9d Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Thu, 28 Sep 2023 17:04:39 +0100 Subject: [PATCH 4/6] Update FoiAttachment#body When attachment is unmasked we attempt to run the mask job inline. This commit makes a couple of changes. 1. It unlocks the job so we can run it, so we don't have to worry about if the job doesn't run 2. Now the mask job can call `IncomingMessage#parse_raw_email!(true)` we need to handle to case where the attachment disappears after being rebuilt. This results in a `ActiveRecord::RecordNotFound` when reloading the instance. 3. Add RebuiltAttachment exception which we can then catch and rescue later. See the next commit. --- app/models/foi_attachment.rb | 12 +++++++---- spec/models/foi_attachment_spec.rb | 32 ++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index be107cabcd..d3c9f2b12b 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -34,6 +34,7 @@ class FoiAttachment < ApplicationRecord include MessageProminence MissingAttachment = Class.new(StandardError) + RebuiltAttachment = Class.new(StandardError) belongs_to :incoming_message, inverse_of: :foi_attachments @@ -107,11 +108,14 @@ def body if masked? @cached_body = file.download else - job = FoiAttachmentMaskJob.perform_now(self) - return body if job - - raise MissingAttachment, "job already queued (ID=#{id})" + FoiAttachmentMaskJob.unlock!(self) + FoiAttachmentMaskJob.perform_now(self) + reload + body end + + rescue ActiveRecord::RecordNotFound + raise RebuiltAttachment, "attachment no longer present in DB (ID=#{id})" end # body as UTF-8 text, with scrubbing of invalid chars if needed diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index b3508966a3..802cd409e6 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -114,8 +114,13 @@ end end - context 'when unmasked and mask job is not queued' do - let(:foi_attachment) { FactoryBot.create(:body_text, :unmasked) } + context 'when unmasked and original attachment can be found' do + let(:incoming_message) do + FactoryBot.create(:incoming_message, foi_attachments_factories: [ + [:body_text, :unmasked] + ]) + end + let(:foi_attachment) { incoming_message.foi_attachments.last } it 'calls the FoiAttachmentMaskJob now and return the masked body' do expect(FoiAttachmentMaskJob).to receive(:perform_now). @@ -129,17 +134,28 @@ end end - context 'when unmasked and mask job is already queued' do - let(:foi_attachment) { FactoryBot.create(:body_text, :unmasked) } + context 'when unmasked and original attachment can not be found' do + let(:incoming_message) do + FactoryBot.create(:incoming_message, foi_attachments_factories: [ + [:body_text, :unmasked] + ]) + end + let(:foi_attachment) { incoming_message.foi_attachments.last } before do - allow(FoiAttachmentMaskJob).to receive(:perform_now).and_return(false) + foi_attachment.update(hexdigest: '123') + + expect(FoiAttachmentMaskJob).to receive(:perform_now). + with(foi_attachment). + and_invoke(-> (_) { + # mock the job + incoming_message.parse_raw_email!(true) + }) end - it 'raises missing attachment exception' do + it 'raises rebuilt attachment exception' do expect { foi_attachment.body }.to raise_error( - FoiAttachment::MissingAttachment, - "job already queued (ID=#{foi_attachment.id})" + FoiAttachment::RebuiltAttachment ) end end From b3b9b486f628087683443af932745c4c3c2b435b Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Thu, 28 Sep 2023 17:05:03 +0100 Subject: [PATCH 5/6] Add method to catch RebuiltAttachment exceptions When calling `FoiAttachment#body` this can now raise an exception due to the current attachment instance getting replaced and no longer existing in the database. We can catch and rescue from this by reloading the new attachment and calling the same code again. This is what `FoiAttachment.protect_against_rebuilt_attachments` does and should prevent exceptions when viewing requests which haven't be masked and cached yet. These will normally be very old attachments as this also need the `hexdigest` to be mismatched. --- app/models/foi_attachment.rb | 27 +++++++++++++++++++++++++++ app/models/incoming_message.rb | 10 +++++++--- spec/models/foi_attachment_spec.rb | 20 ++++++++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index d3c9f2b12b..9145fa4647 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -82,6 +82,33 @@ class FoiAttachment < ApplicationRecord }.freeze # rubocop:enable Style/LineLength + # Helper method which can wrap calls to #body/#body_as_text/#default_body to + # ensure the `RebuiltAttachment` exception is caught. This is useful for when + # the body is required inline eg when the search index is being built or the + # text of the main attachment part is being cached in the database. + # + # Need to call this so the attachment is loaded within the block, EG, this + # would work: + # protect_against_rebuilt_attachments do + # incoming_message.foi_attachment.last.body + # end + # + # but this would fail: + # attachment = incoming_message.foi_attachment.last + # protect_against_rebuilt_attachments do + # attachment.body + # end + def self.protect_against_rebuilt_attachments(&block) + errored = false + begin + block.call if block_given? + rescue RebuiltAttachment => ex + raise ex if errored + errored = true + retry + end + end + def delete_cached_file! @cached_body = nil file.purge if file.attached? diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 41c8942328..bc36d57be9 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -345,8 +345,10 @@ def get_main_body_text_unfolded # Returns body text from main text part of email, converted to UTF-8 def get_main_body_text_internal parse_raw_email! - main_part = get_main_body_text_part - _convert_part_body_to_text(main_part) + FoiAttachment.protect_against_rebuilt_attachments do + main_part = get_main_body_text_part + _convert_part_body_to_text(main_part) + end end # Given a main text part, converts it to text @@ -544,7 +546,9 @@ def get_body_for_quoting # Returns text version of attachment text def get_attachment_text_full - text = _get_attachment_text_internal + text = FoiAttachment.protect_against_rebuilt_attachments do + _get_attachment_text_internal + end text = apply_masks(text, 'text/html') # This can be useful for memory debugging diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index 802cd409e6..b63eb3a6cc 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -158,6 +158,26 @@ FoiAttachment::RebuiltAttachment ) end + + context 'and called within protect_against_rebuilt_attachments block' do + def body + FoiAttachment.protect_against_rebuilt_attachments do + # Note, we're not using the `foi_attachment` "let" variable as the + # `FoiAttachment#body` code will call `parse_raw_email!(true)` and + # recreate the attachments, so the body returned, will belong to a + # new `FoiAttachment` instance + incoming_message.foi_attachments.last.body + end + end + + it 'does not raise rebuilt attachment exception' do + expect { body }.to_not raise_error + end + + it 'returns rebuilt body' do + expect(body).to eq('hereisthetext') + end + end end end From 625c6858da64ed42490def7f5e239437926f9cbc Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Mon, 2 Oct 2023 16:43:32 +0100 Subject: [PATCH 6/6] Improve exception message Exception notifications don't include the name of the exception so improve the message to make it clearer what the issue is. --- app/models/incoming_message.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index bc36d57be9..d973cc7b93 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -481,7 +481,8 @@ def extract_attachments if hidden_old_attachments.any? # if there are hidden attachments error as we don't want to re-build and # lose the prominence as this will make them public - raise UnableToExtractAttachments, "due to prominence of attachments " \ + raise UnableToExtractAttachments, "unable to extract attachments due " \ + "to prominence of attachments " \ "(ID=#{hidden_old_attachments.map(&:id).join(', ')})" else old_attachments.each(&:mark_for_destruction)