Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redeliver holding pen emails if there's a single guess #7790

Merged
merged 4 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 31 additions & 19 deletions app/mailers/request_mailer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -235,35 +235,47 @@ def requests_matching_email(email)
InfoRequest.matching_incoming_email(addresses)
end

def send_to_holding_pen(email, raw_email, opts)
opts[:rejected_reason] =
_("Could not identify the request from the email address")
request = InfoRequest.holding_pen_request
request.receive(email, raw_email, opts)
end

# Member function, called on the new class made in self.receive above
def receive(email, raw_email, source = :mailin)
opts = { source: source }

# Find which info requests the email is for
reply_info_requests = requests_matching_email(email)
# Only check mail that doesn't have spam in the header
return if SpamAddress.spam?(MailHandler.get_all_addresses(email))

# Nothing found OR multiple different info requests, so save in holding pen
if reply_info_requests.empty? || reply_info_requests.count > 1
opts[:rejected_reason] =
_("Could not identify the request from the email address")
request = InfoRequest.holding_pen_request
# Find exact matches for info requests
exact_info_requests = requests_matching_email(email)

unless SpamAddress.spam?(MailHandler.get_all_addresses(email))
request.receive(email, raw_email, opts)
end
return
# Find any guesses for info requests
unless exact_info_requests.count == 1
guessed_info_requests = Guess.guessed_info_requests(email)
end

# Send the message to each request, to be archived with it
reply_info_requests.each do |reply_info_request|
# If environment variable STOP_DUPLICATES is set, don't send message with same id again
if ENV['STOP_DUPLICATES']
if reply_info_request.already_received?(email, raw_email)
raise "message #{ email.message_id } already received by request"
end
# If there is only one info request matching mail, it gets attached to the
# request to be archived with it
if exact_info_requests.count == 1 || guessed_info_requests.count == 1
info_request = exact_info_requests.first || guessed_info_requests.first

if exact_info_requests.empty? && guessed_info_requests.count == 1
info_request.log_event(
'redeliver_incoming',
editor: 'automatic',
destination_request: info_request
)
end

reply_info_request.receive(email, raw_email, opts)
info_request.receive(email, raw_email, opts)

else
# Otherwise, if there are no matching IRs, multiple IRs, or multiple IR
# guesses, we send the mail to the holding pen
send_to_holding_pen(email, raw_email, opts)
end
end

Expand Down
64 changes: 64 additions & 0 deletions app/models/guess.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
require 'text'

##
# A guess at which info request a incoming message should be associated to
#
class Guess
attr_reader :info_request, :components

# The percentage similarity the id or idhash much fulfil
THRESHOLD = 0.8

##
# Return InfoRequest which we guess should receive an incoming message based
# on a threshold.
#
def self.guessed_info_requests(email)
# Match the email address in the message without matching the hash
email_addresses = MailHandler.get_all_addresses(email)
guesses = InfoRequest.guess_by_incoming_email(email_addresses)

guesses_reaching_threshold = guesses.select do |ir_guess|
id_score = ir_guess.id_score
idhash_score = ir_guess.idhash_score

(id_score == 1 && idhash_score >= THRESHOLD) ||
(id_score >= THRESHOLD && idhash_score == 1)
end

guesses_reaching_threshold.map(&:info_request).uniq
end

def initialize(info_request, **components)
@info_request = info_request
@components = components
end

def [](key)
components[key]
end

def id_score
return 1 unless self[:id]
similarity(self[:id], info_request.id)
end

def idhash_score
return 1 unless self[:idhash]
similarity(self[:idhash], info_request.idhash)
end

def ==(other)
info_request == other.info_request && components == other.components
end

def match_method
components.keys.first
end

private

def similarity(a, b)
Text::WhiteSimilarity.similarity(a.to_s, b.to_s)
end
end
12 changes: 8 additions & 4 deletions app/models/info_request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
require 'fileutils'

class InfoRequest < ApplicationRecord
Guess = Struct.new(:info_request, :matched_value, :match_method).freeze
OLD_AGE_IN_DAYS = 21.days

include Rails.application.routes.url_helpers
Expand Down Expand Up @@ -245,8 +244,13 @@ def self.guess_by_incoming_email(*emails)
guesses = emails.flatten.reduce([]) do |memo, email|
id, idhash = _extract_id_hash_from_email(email)
id, idhash = _guess_idhash_from_email(email) if idhash.nil? || id.nil?
memo << Guess.new(find_by_id(id), email, :id)
memo << Guess.new(find_by_idhash(idhash), email, :idhash)

memo << Guess.new(
find_by_id(id), email: email, id: id, idhash: idhash
)
memo << Guess.new(
find_by_idhash(idhash), email: email, id: id, idhash: idhash
)
end

# Unique Guesses where we've found an `InfoRequest`
Expand Down Expand Up @@ -315,7 +319,7 @@ def self.guess_by_incoming_subject(subject_line)
limit(25)

guesses = requests.each.reduce([]) do |memo, request|
memo << Guess.new(request, subject_line, :subject)
memo << Guess.new(request, subject: subject_line)
end

# Unique Guesses where we've found an `InfoRequest`
Expand Down
4 changes: 2 additions & 2 deletions app/views/admin_raw_email/_holding_pen.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@
<code><%= guess.match_method %></code>
<% if guess.match_method == :subject %>
because the incoming message has a subject of
<strong><%= guess.matched_value %></strong>
<strong><%= guess[:subject] %></strong>
<% else %>
because it has an incoming email address
of <strong><%= guess.info_request.incoming_email %></strong>
and this incoming message was sent to
<strong><%= guess.matched_value %></strong>.
<strong><%= guess[:email] %></strong>.
<% end %>

</p>
Expand Down
7 changes: 5 additions & 2 deletions spec/controllers/admin_raw_email_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@

it 'assigns guessed requests based on the hash' do
get :show, params: { id: incoming_message.raw_email.id }
guess = InfoRequest::Guess.new(info_request, invalid_to, :idhash)
idhash = InfoRequest.hash_from_id(info_request.id)
guess = Guess.new(
info_request, email: invalid_to, id: nil, idhash: idhash
)
expect(assigns[:guessed_info_requests]).to eq([guess])
end

Expand All @@ -72,7 +75,7 @@
FactoryBot.create(:incoming_message, subject: 'Basic Email').
info_request
get :show, params: { id: incoming_message.raw_email.id }
guess = InfoRequest::Guess.new(other_request, 'Basic Email', :subject)
guess = Guess.new(other_request, subject: 'Basic Email')
expect(assigns[:guessed_info_requests]).to include(guess)
end

Expand Down
20 changes: 0 additions & 20 deletions spec/integration/admin_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,26 +127,6 @@
expect(page).to have_content "Only the authority can reply to this request"
end
end

it "guesses a misdirected request" do
info_request = FactoryBot.create(:info_request,
allow_new_responses_from: 'authority_only',
handle_rejected_responses: 'holding_pen')
mail_to = "request-#{info_request.id}[email protected]"
receive_incoming_mail('incoming-request-plain.email', email_to: mail_to)
interesting_email = last_holding_pen_mail

# now we add another message to the queue, which we're not interested in
receive_incoming_mail('incoming-request-plain.email',
email_to: info_request.incoming_email,
email_from: "")
expect(holding_pen_messages.length).to eq(2)
using_session(@admin) do
visit admin_raw_email_path interesting_email
expect(page).to have_content "Could not identify the request"
expect(page).to have_content info_request.title
end
end
end

describe 'generating an upload url' do
Expand Down
19 changes: 19 additions & 0 deletions spec/mailers/request_mailer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@
deliveries.clear
end

it "should append it to the appropriate request if there is only one guess of information request" do
ir = info_requests(:fancy_dog_request)
expect(ir.incoming_messages.count).to eq(1) # in the fixture
receive_incoming_mail(
'incoming-request-plain.email',
email_to: "request-#{ir.id}-#{ir.idhash}a@localhost"
)
expect(ir.incoming_messages.count).to eq(2) # one more arrives
expect(ir.info_request_events[-1].incoming_message_id).not_to be_nil
expect(ir.info_request_events[-2].params[:editor]).to eq("automatic")

deliveries = ActionMailer::Base.deliveries
expect(deliveries.size).to eq(1)
mail = deliveries[0]
# to the user who sent fancy_dog_request
expect(mail.to).to eq(['bob@localhost'])
deliveries.clear
end

it "should store mail in holding pen and send to admin when the email is not to any information request" do
ir = info_requests(:fancy_dog_request)
expect(ir.incoming_messages.count).to eq(1)
Expand Down
95 changes: 95 additions & 0 deletions spec/models/guess_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
require 'spec_helper'

RSpec.describe Guess do
let(:info_request) do
FactoryBot.create(:info_request, id: 100, idhash: '4e637388')
end

describe '.guessed_info_requests' do
subject(:guesses) { described_class.guessed_info_requests(email) }

let(:email) do
mail = Mail.new
mail.to address
mail
end

let(:info_request) { FactoryBot.create(:info_request, id: 4566) }
let!(:other_info_request) { FactoryBot.create(:info_request) }

let(:id) { info_request.id }
let(:hash) { info_request.idhash }

context 'with email matching ID and ID hash' do
let(:address) { info_request.incoming_email }

it 'return matching InfoRequest' do
is_expected.to match_array([info_request])
end
end

context 'with email matching ID and almost ID hash' do
let(:address) { "request-#{id}-#{hash[0...-1]}}z@localhost" }

it 'return guessed InfoRequest' do
is_expected.to match_array([info_request])
end
end

context 'with email matching ID hash and almost ID' do
let(:address) { "request-#{id.to_s[0...-1]}-#{hash}@localhost" }

it 'return guessed InfoRequest' do
is_expected.to match_array([info_request])
end
end
end

describe 'with a subject line given' do
let(:guess) { described_class.new(info_request, subject: 'subject_line') }

it 'returns an id_score of 1' do
expect(guess.id_score).to eq(1)
end

it 'returns an idhash_score of 1' do
expect(guess.idhash_score).to eq(1)
end
end

describe 'with an id given' do
let(:guess_1) { described_class.new(info_request, id: 100) }
let(:guess_2) { described_class.new(info_request, id: 456) }
let(:guess_3) { described_class.new(info_request, id: 109) }

it 'returns an id_score of 1 when it is correct' do
expect(guess_1.id_score).to eq(1.0)
end

it 'returns an id_score of 0 when there is no similarity' do
expect(guess_2.id_score).to eq(0.0)
end

it 'returns a value between 0 and 1 when there is some similarity' do
expect(guess_3.id_score).to be_between(0, 1).exclusive
end
end

describe 'with an idhash given' do
let(:guess_1) { described_class.new(info_request, idhash: '4e637388') }
let(:guess_2) { described_class.new(info_request, idhash: '12345678') }
let(:guess_3) { described_class.new(info_request, idhash: '4e637399') }

it 'returns an idhash_score of 1 when it is correct' do
expect(guess_1.idhash_score).to eq(1.0)
end

it 'returns an idhash_score of 0 when there is no similarity' do
expect(guess_2.idhash_score).to eq(0.0)
end

it 'returns a value between 0 and 1 when there is some similarity' do
expect(guess_3.idhash_score).to be_between(0, 1).exclusive
end
end
end
Loading
Loading