From 6c455efd9b0715b7d9de15c47473135f2f9e7a86 Mon Sep 17 00:00:00 2001 From: Alexander Griffen Date: Fri, 14 Jul 2023 15:43:53 +0100 Subject: [PATCH] feat: guessed mail don't go to holding pen Now, the 'Guessing' mechanism will be used when a mail is received. If there is only one guess produced by this mechanism, then the mail will go straight to that guess's IR --- app/mailers/request_mailer.rb | 50 ++++++++++++++++++----------- app/models/guess.rb | 23 +++++++++++++ spec/integration/admin_spec.rb | 20 ------------ spec/mailers/request_mailer_spec.rb | 19 +++++++++++ spec/models/guess_spec.rb | 40 +++++++++++++++++++++++ 5 files changed, 113 insertions(+), 39 deletions(-) diff --git a/app/mailers/request_mailer.rb b/app/mailers/request_mailer.rb index fa42e853f9..47daf34ee9 100644 --- a/app/mailers/request_mailer.rb +++ b/app/mailers/request_mailer.rb @@ -235,35 +235,47 @@ def requests_matching_email(email) InfoRequest.matching_incoming_email(addresses) end + def send_to_holding_pen(email, raw_email, opts) + opts[:rejected_reason] = + _("Could not identify the request from the email address") + request = InfoRequest.holding_pen_request + request.receive(email, raw_email, opts) + end + # Member function, called on the new class made in self.receive above def receive(email, raw_email, source = :mailin) opts = { source: source } - # Find which info requests the email is for - reply_info_requests = requests_matching_email(email) + # Only check mail that doesn't have spam in the header + return if SpamAddress.spam?(MailHandler.get_all_addresses(email)) - # Nothing found OR multiple different info requests, so save in holding pen - if reply_info_requests.empty? || reply_info_requests.count > 1 - opts[:rejected_reason] = - _("Could not identify the request from the email address") - request = InfoRequest.holding_pen_request + # Find exact matches for info requests + exact_info_requests = requests_matching_email(email) - unless SpamAddress.spam?(MailHandler.get_all_addresses(email)) - request.receive(email, raw_email, opts) - end - return + # Find any guesses for info requests + unless exact_info_requests.count == 1 + guessed_info_requests = Guess.guessed_info_requests(email) end - # Send the message to each request, to be archived with it - reply_info_requests.each do |reply_info_request| - # If environment variable STOP_DUPLICATES is set, don't send message with same id again - if ENV['STOP_DUPLICATES'] - if reply_info_request.already_received?(email, raw_email) - raise "message #{ email.message_id } already received by request" - end + # If there is only one info request matching mail, it gets attached to the + # request to be archived with it + if exact_info_requests.count == 1 || guessed_info_requests.count == 1 + info_request = exact_info_requests.first || guessed_info_requests.first + + if exact_info_requests.empty? && guessed_info_requests.count == 1 + info_request.log_event( + 'redeliver_incoming', + editor: 'automatic', + destination_request: info_request + ) end - reply_info_request.receive(email, raw_email, opts) + info_request.receive(email, raw_email, opts) + + else + # Otherwise, if there are no matching IRs, multiple IRs, or multiple IR + # guesses, we send the mail to the holding pen + send_to_holding_pen(email, raw_email, opts) end end diff --git a/app/models/guess.rb b/app/models/guess.rb index 1220f789f6..f75eafbf70 100644 --- a/app/models/guess.rb +++ b/app/models/guess.rb @@ -6,6 +6,29 @@ class Guess attr_reader :info_request, :components + # The percentage similarity the id or idhash much fulfil + THRESHOLD = 0.8 + + ## + # Return InfoRequest which we guess should receive an incoming message based + # on a threshold. + # + def self.guessed_info_requests(email) + # Match the email address in the message without matching the hash + email_addresses = MailHandler.get_all_addresses(email) + guesses = InfoRequest.guess_by_incoming_email(email_addresses) + + guesses_reaching_threshold = guesses.select do |ir_guess| + id_score = ir_guess.id_score + idhash_score = ir_guess.idhash_score + + (id_score == 1 && idhash_score >= THRESHOLD) || + (id_score >= THRESHOLD && idhash_score == 1) + end + + guesses_reaching_threshold.map(&:info_request).uniq + end + def initialize(info_request, **components) @info_request = info_request @components = components diff --git a/spec/integration/admin_spec.rb b/spec/integration/admin_spec.rb index e298236a4a..edfc4c0a96 100644 --- a/spec/integration/admin_spec.rb +++ b/spec/integration/admin_spec.rb @@ -127,26 +127,6 @@ expect(page).to have_content "Only the authority can reply to this request" end end - - it "guesses a misdirected request" do - info_request = FactoryBot.create(:info_request, - allow_new_responses_from: 'authority_only', - handle_rejected_responses: 'holding_pen') - mail_to = "request-#{info_request.id}-asdfg@example.com" - receive_incoming_mail('incoming-request-plain.email', email_to: mail_to) - interesting_email = last_holding_pen_mail - - # now we add another message to the queue, which we're not interested in - receive_incoming_mail('incoming-request-plain.email', - email_to: info_request.incoming_email, - email_from: "") - expect(holding_pen_messages.length).to eq(2) - using_session(@admin) do - visit admin_raw_email_path interesting_email - expect(page).to have_content "Could not identify the request" - expect(page).to have_content info_request.title - end - end end describe 'generating an upload url' do diff --git a/spec/mailers/request_mailer_spec.rb b/spec/mailers/request_mailer_spec.rb index b2563bb73c..3cd72d6b3d 100644 --- a/spec/mailers/request_mailer_spec.rb +++ b/spec/mailers/request_mailer_spec.rb @@ -24,6 +24,25 @@ deliveries.clear end + it "should append it to the appropriate request if there is only one guess of information request" do + ir = info_requests(:fancy_dog_request) + expect(ir.incoming_messages.count).to eq(1) # in the fixture + receive_incoming_mail( + 'incoming-request-plain.email', + email_to: "request-#{ir.id}-#{ir.idhash}a@localhost" + ) + expect(ir.incoming_messages.count).to eq(2) # one more arrives + expect(ir.info_request_events[-1].incoming_message_id).not_to be_nil + expect(ir.info_request_events[-2].params[:editor]).to eq("automatic") + + deliveries = ActionMailer::Base.deliveries + expect(deliveries.size).to eq(1) + mail = deliveries[0] + # to the user who sent fancy_dog_request + expect(mail.to).to eq(['bob@localhost']) + deliveries.clear + end + it "should store mail in holding pen and send to admin when the email is not to any information request" do ir = info_requests(:fancy_dog_request) expect(ir.incoming_messages.count).to eq(1) diff --git a/spec/models/guess_spec.rb b/spec/models/guess_spec.rb index 4ca55ccb27..d579bba0ae 100644 --- a/spec/models/guess_spec.rb +++ b/spec/models/guess_spec.rb @@ -5,6 +5,46 @@ FactoryBot.create(:info_request, id: 100, idhash: '4e637388') end + describe '.guessed_info_requests' do + subject(:guesses) { described_class.guessed_info_requests(email) } + + let(:email) do + mail = Mail.new + mail.to address + mail + end + + let(:info_request) { FactoryBot.create(:info_request, id: 4566) } + let!(:other_info_request) { FactoryBot.create(:info_request) } + + let(:id) { info_request.id } + let(:hash) { info_request.idhash } + + context 'with email matching ID and ID hash' do + let(:address) { info_request.incoming_email } + + it 'return matching InfoRequest' do + is_expected.to match_array([info_request]) + end + end + + context 'with email matching ID and almost ID hash' do + let(:address) { "request-#{id}-#{hash[0...-1]}}z@localhost" } + + it 'return guessed InfoRequest' do + is_expected.to match_array([info_request]) + end + end + + context 'with email matching ID hash and almost ID' do + let(:address) { "request-#{id.to_s[0...-1]}-#{hash}@localhost" } + + it 'return guessed InfoRequest' do + is_expected.to match_array([info_request]) + end + end + end + describe 'with a subject line given' do let(:guess) { described_class.new(info_request, subject: 'subject_line') }