Skip to content

Commit

Permalink
Merge branch '5990-single-guess-redeliver' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
gbp committed Sep 21, 2023
2 parents a96966a + 6c455ef commit b6c48e4
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 92 deletions.
50 changes: 31 additions & 19 deletions app/mailers/request_mailer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -235,35 +235,47 @@ def requests_matching_email(email)
InfoRequest.matching_incoming_email(addresses)
end

def send_to_holding_pen(email, raw_email, opts)
opts[:rejected_reason] =
_("Could not identify the request from the email address")
request = InfoRequest.holding_pen_request
request.receive(email, raw_email, opts)
end

# Member function, called on the new class made in self.receive above
def receive(email, raw_email, source = :mailin)
opts = { source: source }

# Find which info requests the email is for
reply_info_requests = requests_matching_email(email)
# Only check mail that doesn't have spam in the header
return if SpamAddress.spam?(MailHandler.get_all_addresses(email))

# Nothing found OR multiple different info requests, so save in holding pen
if reply_info_requests.empty? || reply_info_requests.count > 1
opts[:rejected_reason] =
_("Could not identify the request from the email address")
request = InfoRequest.holding_pen_request
# Find exact matches for info requests
exact_info_requests = requests_matching_email(email)

unless SpamAddress.spam?(MailHandler.get_all_addresses(email))
request.receive(email, raw_email, opts)
end
return
# Find any guesses for info requests
unless exact_info_requests.count == 1
guessed_info_requests = Guess.guessed_info_requests(email)
end

# Send the message to each request, to be archived with it
reply_info_requests.each do |reply_info_request|
# If environment variable STOP_DUPLICATES is set, don't send message with same id again
if ENV['STOP_DUPLICATES']
if reply_info_request.already_received?(email, raw_email)
raise "message #{ email.message_id } already received by request"
end
# If there is only one info request matching mail, it gets attached to the
# request to be archived with it
if exact_info_requests.count == 1 || guessed_info_requests.count == 1
info_request = exact_info_requests.first || guessed_info_requests.first

if exact_info_requests.empty? && guessed_info_requests.count == 1
info_request.log_event(
'redeliver_incoming',
editor: 'automatic',
destination_request: info_request
)
end

reply_info_request.receive(email, raw_email, opts)
info_request.receive(email, raw_email, opts)

else
# Otherwise, if there are no matching IRs, multiple IRs, or multiple IR
# guesses, we send the mail to the holding pen
send_to_holding_pen(email, raw_email, opts)
end
end

Expand Down
64 changes: 64 additions & 0 deletions app/models/guess.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
require 'text'

##
# A guess at which info request a incoming message should be associated to
#
class Guess
attr_reader :info_request, :components

# The percentage similarity the id or idhash much fulfil
THRESHOLD = 0.8

##
# Return InfoRequest which we guess should receive an incoming message based
# on a threshold.
#
def self.guessed_info_requests(email)
# Match the email address in the message without matching the hash
email_addresses = MailHandler.get_all_addresses(email)
guesses = InfoRequest.guess_by_incoming_email(email_addresses)

guesses_reaching_threshold = guesses.select do |ir_guess|
id_score = ir_guess.id_score
idhash_score = ir_guess.idhash_score

(id_score == 1 && idhash_score >= THRESHOLD) ||
(id_score >= THRESHOLD && idhash_score == 1)
end

guesses_reaching_threshold.map(&:info_request).uniq
end

def initialize(info_request, **components)
@info_request = info_request
@components = components
end

def [](key)
components[key]
end

def id_score
return 1 unless self[:id]
similarity(self[:id], info_request.id)
end

def idhash_score
return 1 unless self[:idhash]
similarity(self[:idhash], info_request.idhash)
end

def ==(other)
info_request == other.info_request && components == other.components
end

def match_method
components.keys.first
end

private

def similarity(a, b)
Text::WhiteSimilarity.similarity(a.to_s, b.to_s)
end
end
12 changes: 8 additions & 4 deletions app/models/info_request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
require 'fileutils'

class InfoRequest < ApplicationRecord
Guess = Struct.new(:info_request, :matched_value, :match_method).freeze
OLD_AGE_IN_DAYS = 21.days

include Rails.application.routes.url_helpers
Expand Down Expand Up @@ -245,8 +244,13 @@ def self.guess_by_incoming_email(*emails)
guesses = emails.flatten.reduce([]) do |memo, email|
id, idhash = _extract_id_hash_from_email(email)
id, idhash = _guess_idhash_from_email(email) if idhash.nil? || id.nil?
memo << Guess.new(find_by_id(id), email, :id)
memo << Guess.new(find_by_idhash(idhash), email, :idhash)

memo << Guess.new(
find_by_id(id), email: email, id: id, idhash: idhash
)
memo << Guess.new(
find_by_idhash(idhash), email: email, id: id, idhash: idhash
)
end

# Unique Guesses where we've found an `InfoRequest`
Expand Down Expand Up @@ -315,7 +319,7 @@ def self.guess_by_incoming_subject(subject_line)
limit(25)

guesses = requests.each.reduce([]) do |memo, request|
memo << Guess.new(request, subject_line, :subject)
memo << Guess.new(request, subject: subject_line)
end

# Unique Guesses where we've found an `InfoRequest`
Expand Down
4 changes: 2 additions & 2 deletions app/views/admin_raw_email/_holding_pen.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@
<code><%= guess.match_method %></code>
<% if guess.match_method == :subject %>
because the incoming message has a subject of
<strong><%= guess.matched_value %></strong>
<strong><%= guess[:subject] %></strong>
<% else %>
because it has an incoming email address
of <strong><%= guess.info_request.incoming_email %></strong>
and this incoming message was sent to
<strong><%= guess.matched_value %></strong>.
<strong><%= guess[:email] %></strong>.
<% end %>

</p>
Expand Down
7 changes: 5 additions & 2 deletions spec/controllers/admin_raw_email_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@

it 'assigns guessed requests based on the hash' do
get :show, params: { id: incoming_message.raw_email.id }
guess = InfoRequest::Guess.new(info_request, invalid_to, :idhash)
idhash = InfoRequest.hash_from_id(info_request.id)
guess = Guess.new(
info_request, email: invalid_to, id: nil, idhash: idhash
)
expect(assigns[:guessed_info_requests]).to eq([guess])
end

Expand All @@ -72,7 +75,7 @@
FactoryBot.create(:incoming_message, subject: 'Basic Email').
info_request
get :show, params: { id: incoming_message.raw_email.id }
guess = InfoRequest::Guess.new(other_request, 'Basic Email', :subject)
guess = Guess.new(other_request, subject: 'Basic Email')
expect(assigns[:guessed_info_requests]).to include(guess)
end

Expand Down
20 changes: 0 additions & 20 deletions spec/integration/admin_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,26 +127,6 @@
expect(page).to have_content "Only the authority can reply to this request"
end
end

it "guesses a misdirected request" do
info_request = FactoryBot.create(:info_request,
allow_new_responses_from: 'authority_only',
handle_rejected_responses: 'holding_pen')
mail_to = "request-#{info_request.id}[email protected]"
receive_incoming_mail('incoming-request-plain.email', email_to: mail_to)
interesting_email = last_holding_pen_mail

# now we add another message to the queue, which we're not interested in
receive_incoming_mail('incoming-request-plain.email',
email_to: info_request.incoming_email,
email_from: "")
expect(holding_pen_messages.length).to eq(2)
using_session(@admin) do
visit admin_raw_email_path interesting_email
expect(page).to have_content "Could not identify the request"
expect(page).to have_content info_request.title
end
end
end

describe 'generating an upload url' do
Expand Down
19 changes: 19 additions & 0 deletions spec/mailers/request_mailer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@
deliveries.clear
end

it "should append it to the appropriate request if there is only one guess of information request" do
ir = info_requests(:fancy_dog_request)
expect(ir.incoming_messages.count).to eq(1) # in the fixture
receive_incoming_mail(
'incoming-request-plain.email',
email_to: "request-#{ir.id}-#{ir.idhash}a@localhost"
)
expect(ir.incoming_messages.count).to eq(2) # one more arrives
expect(ir.info_request_events[-1].incoming_message_id).not_to be_nil
expect(ir.info_request_events[-2].params[:editor]).to eq("automatic")

deliveries = ActionMailer::Base.deliveries
expect(deliveries.size).to eq(1)
mail = deliveries[0]
# to the user who sent fancy_dog_request
expect(mail.to).to eq(['bob@localhost'])
deliveries.clear
end

it "should store mail in holding pen and send to admin when the email is not to any information request" do
ir = info_requests(:fancy_dog_request)
expect(ir.incoming_messages.count).to eq(1)
Expand Down
95 changes: 95 additions & 0 deletions spec/models/guess_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
require 'spec_helper'

RSpec.describe Guess do
let(:info_request) do
FactoryBot.create(:info_request, id: 100, idhash: '4e637388')
end

describe '.guessed_info_requests' do
subject(:guesses) { described_class.guessed_info_requests(email) }

let(:email) do
mail = Mail.new
mail.to address
mail
end

let(:info_request) { FactoryBot.create(:info_request, id: 4566) }
let!(:other_info_request) { FactoryBot.create(:info_request) }

let(:id) { info_request.id }
let(:hash) { info_request.idhash }

context 'with email matching ID and ID hash' do
let(:address) { info_request.incoming_email }

it 'return matching InfoRequest' do
is_expected.to match_array([info_request])
end
end

context 'with email matching ID and almost ID hash' do
let(:address) { "request-#{id}-#{hash[0...-1]}}z@localhost" }

it 'return guessed InfoRequest' do
is_expected.to match_array([info_request])
end
end

context 'with email matching ID hash and almost ID' do
let(:address) { "request-#{id.to_s[0...-1]}-#{hash}@localhost" }

it 'return guessed InfoRequest' do
is_expected.to match_array([info_request])
end
end
end

describe 'with a subject line given' do
let(:guess) { described_class.new(info_request, subject: 'subject_line') }

it 'returns an id_score of 1' do
expect(guess.id_score).to eq(1)
end

it 'returns an idhash_score of 1' do
expect(guess.idhash_score).to eq(1)
end
end

describe 'with an id given' do
let(:guess_1) { described_class.new(info_request, id: 100) }
let(:guess_2) { described_class.new(info_request, id: 456) }
let(:guess_3) { described_class.new(info_request, id: 109) }

it 'returns an id_score of 1 when it is correct' do
expect(guess_1.id_score).to eq(1.0)
end

it 'returns an id_score of 0 when there is no similarity' do
expect(guess_2.id_score).to eq(0.0)
end

it 'returns a value between 0 and 1 when there is some similarity' do
expect(guess_3.id_score).to be_between(0, 1).exclusive
end
end

describe 'with an idhash given' do
let(:guess_1) { described_class.new(info_request, idhash: '4e637388') }
let(:guess_2) { described_class.new(info_request, idhash: '12345678') }
let(:guess_3) { described_class.new(info_request, idhash: '4e637399') }

it 'returns an idhash_score of 1 when it is correct' do
expect(guess_1.idhash_score).to eq(1.0)
end

it 'returns an idhash_score of 0 when there is no similarity' do
expect(guess_2.idhash_score).to eq(0.0)
end

it 'returns a value between 0 and 1 when there is some similarity' do
expect(guess_3.idhash_score).to be_between(0, 1).exclusive
end
end
end
Loading

0 comments on commit b6c48e4

Please sign in to comment.