From fae6def49df901a1297dde6a90b67954b2c1ced5 Mon Sep 17 00:00:00 2001
From: Graeme Porteous
Date: Mon, 7 Aug 2023 11:53:19 +0100
Subject: [PATCH 1/4] Extract Guess into its own class
---
app/models/guess.rb | 2 +
app/models/info_request.rb | 1 -
.../admin_raw_email_controller_spec.rb | 4 +-
spec/models/info_request_spec.rb | 75 ++++++-------------
4 files changed, 28 insertions(+), 54 deletions(-)
create mode 100644 app/models/guess.rb
diff --git a/app/models/guess.rb b/app/models/guess.rb
new file mode 100644
index 0000000000..17523695a6
--- /dev/null
+++ b/app/models/guess.rb
@@ -0,0 +1,2 @@
+class Guess < Struct.new(:info_request, :matched_value, :match_method)
+end
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index 2175f27dbe..9626c76c29 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -40,7 +40,6 @@
require 'fileutils'
class InfoRequest < ApplicationRecord
- Guess = Struct.new(:info_request, :matched_value, :match_method).freeze
OLD_AGE_IN_DAYS = 21.days
include Rails.application.routes.url_helpers
diff --git a/spec/controllers/admin_raw_email_controller_spec.rb b/spec/controllers/admin_raw_email_controller_spec.rb
index ef923f2f42..ec83eacd6b 100644
--- a/spec/controllers/admin_raw_email_controller_spec.rb
+++ b/spec/controllers/admin_raw_email_controller_spec.rb
@@ -63,7 +63,7 @@
it 'assigns guessed requests based on the hash' do
get :show, params: { id: incoming_message.raw_email.id }
- guess = InfoRequest::Guess.new(info_request, invalid_to, :idhash)
+ guess = Guess.new(info_request, invalid_to, :idhash)
expect(assigns[:guessed_info_requests]).to eq([guess])
end
@@ -72,7 +72,7 @@
FactoryBot.create(:incoming_message, subject: 'Basic Email').
info_request
get :show, params: { id: incoming_message.raw_email.id }
- guess = InfoRequest::Guess.new(other_request, 'Basic Email', :subject)
+ guess = Guess.new(other_request, 'Basic Email', :subject)
expect(assigns[:guessed_info_requests]).to include(guess)
end
diff --git a/spec/models/info_request_spec.rb b/spec/models/info_request_spec.rb
index b5ab265ac7..97d908e049 100644
--- a/spec/models/info_request_spec.rb
+++ b/spec/models/info_request_spec.rb
@@ -1910,7 +1910,7 @@
context 'email with an intact id and broken idhash' do
let(:email) { "request-#{ info_request.id }-asdfg@example.com" }
- let(:guess) { described_class::Guess.new(info_request, email, :id) }
+ let(:guess) { Guess.new(info_request, email, :id) }
it { is_expected.to include(guess) }
end
@@ -1919,7 +1919,7 @@
"request-#{ info_request.id }ab-#{ info_request.idhash }@example.com"
end
- let(:guess) { described_class::Guess.new(info_request, email, :idhash) }
+ let(:guess) { Guess.new(info_request, email, :idhash) }
it { is_expected.to include(guess) }
end
@@ -1928,13 +1928,13 @@
"request-a12x3b-#{ info_request.idhash }@example.com"
end
- let(:guess) { described_class::Guess.new(info_request, email, :idhash) }
+ let(:guess) { Guess.new(info_request, email, :idhash) }
it { is_expected.to include(guess) }
end
context 'upper case email with a broken id and otherwise intact idhash' do
let(:email) { "REQUEST-123a-#{ info_request.idhash.upcase }@example.com" }
- let(:guess) { described_class::Guess.new(info_request, email, :idhash) }
+ let(:guess) { Guess.new(info_request, email, :idhash) }
it { is_expected.to include(guess) }
end
@@ -1943,7 +1943,7 @@
"request#{ info_request.id }#{ info_request.idhash }@example.com"
end
- let(:guess) { described_class::Guess.new(info_request, email, :id) }
+ let(:guess) { Guess.new(info_request, email, :id) }
it { is_expected.to include(guess) }
end
@@ -1952,19 +1952,19 @@
"request-#{ info_request.id }#{ info_request.idhash }@example.com"
end
- let(:guess) { described_class::Guess.new(info_request, email, :id) }
+ let(:guess) { Guess.new(info_request, email, :id) }
it { is_expected.to include(guess) }
end
context 'email with a broken id and an intact idhash but missing punctuation' do
let(:email) { "request123ab#{ info_request.idhash }@example.com" }
- let(:guess) { described_class::Guess.new(info_request, email, :idhash) }
+ let(:guess) { Guess.new(info_request, email, :idhash) }
it { is_expected.to include(guess) }
end
context 'email with an intact id and a broken idhash but missing punctuation' do
let(:email) { "request#{ info_request.id }abcdefgh@example.com" }
- let(:guess) { described_class::Guess.new(info_request, email, :id) }
+ let(:guess) { Guess.new(info_request, email, :id) }
it { is_expected.to include(guess) }
end
@@ -1972,13 +1972,13 @@
before { InfoRequest.where(id: 1_231_014).destroy_all }
let!(:info_request) { FactoryBot.create(:info_request, id: 1_231_014) }
let(:email) { 'request-123loL4abcdefgh@example.com' }
- let(:guess) { described_class::Guess.new(info_request, email, :id) }
+ let(:guess) { Guess.new(info_request, email, :id) }
it { is_expected.to include(guess) }
end
context 'email with a broken id and an intact idhash but broken format' do
let(:email) { "reqeust=123ab#{ info_request.idhash }@example.com" }
- let(:guess) { described_class::Guess.new(info_request, email, :idhash) }
+ let(:guess) { Guess.new(info_request, email, :idhash) }
it { is_expected.to include(guess) }
end
@@ -2000,11 +2000,8 @@
"request-#{ info_request_1.id }-#{ info_request_2.idhash }@example.com"
end
- let(:guess_1) { described_class::Guess.new(info_request_1, email, :id) }
-
- let(:guess_2) do
- described_class::Guess.new(info_request_2, email, :idhash)
- end
+ let(:guess_1) { Guess.new(info_request_1, email, :id) }
+ let(:guess_2) { Guess.new(info_request_2, email, :idhash) }
it { is_expected.to match_array([guess_1, guess_2]) }
end
@@ -2013,7 +2010,7 @@
let(:email_1) { "request-123ab-#{ info_request.idhash }@example.com" }
let(:email_2) { "request-#{ info_request.id }-asdfg@example.com" }
let(:email) { [email_1, email_2] }
- let(:guess) { described_class::Guess.new(info_request, email_1, :idhash) }
+ let(:guess) { Guess.new(info_request, email_1, :idhash) }
it { is_expected.to match_array([guess]) }
end
@@ -2032,11 +2029,8 @@
let(:email) { [email_1, email_2] }
- let(:guess_1) { described_class::Guess.new(info_request_1, email_1, :id) }
-
- let(:guess_2) do
- described_class::Guess.new(info_request_2, email_1, :idhash)
- end
+ let(:guess_1) { Guess.new(info_request_1, email_1, :id) }
+ let(:guess_2) { Guess.new(info_request_2, email_1, :idhash) }
it { is_expected.to match_array([guess_1, guess_2]) }
end
@@ -2055,9 +2049,7 @@
context 'a direct reply to the original request email' do
let(:subject_line) { info_request.email_subject_followup }
- let(:guess) do
- described_class::Guess.new(info_request, subject_line, :subject)
- end
+ let(:guess) { Guess.new(info_request, subject_line, :subject) }
it { is_expected.to include(guess) }
end
@@ -2067,9 +2059,7 @@
info_request.email_subject_followup.gsub('Re: ', 'RE: ')
end
- let(:guess) do
- described_class::Guess.new(info_request, subject_line, :subject)
- end
+ let(:guess) { Guess.new(info_request, subject_line, :subject) }
it { is_expected.to include(guess) }
end
@@ -2087,13 +2077,8 @@
'Re: Freedom of Information request - How many jelly beans?'
end
- let(:guess_1) do
- described_class::Guess.new(info_request_1, subject_line, :subject)
- end
-
- let(:guess_2) do
- described_class::Guess.new(info_request_2, subject_line, :subject)
- end
+ let(:guess_1) { Guess.new(info_request_1, subject_line, :subject) }
+ let(:guess_2) { Guess.new(info_request_2, subject_line, :subject) }
it { is_expected.to match_array([guess_1, guess_2]) }
end
@@ -2114,10 +2099,7 @@
end
let(:subject_line) { 'Our ref: 12345678' }
-
- let(:guess) do
- described_class::Guess.new(info_request, subject_line, :subject)
- end
+ let(:guess) { Guess.new(info_request, subject_line, :subject) }
it { is_expected.to include(guess) }
end
@@ -2132,9 +2114,7 @@
end
let(:guess) do
- described_class::Guess.new(InfoRequest.holding_pen_request,
- subject_line,
- :subject)
+ Guess.new(InfoRequest.holding_pen_request, subject_line, :subject)
end
it { is_expected.to_not include(guess) }
@@ -2148,9 +2128,7 @@
info_request: info_request)
end
- let(:guess) do
- described_class::Guess.new(info_request, subject_line, :subject)
- end
+ let(:guess) { Guess.new(info_request, subject_line, :subject) }
it { is_expected.to include(guess) }
end
@@ -2166,13 +2144,8 @@
FactoryBot.create(:incoming_message, subject: subject_line).info_request
end
- let(:guess_1) do
- described_class::Guess.new(info_request_1, subject_line, :subject)
- end
-
- let(:guess_2) do
- described_class::Guess.new(info_request_2, subject_line, :subject)
- end
+ let(:guess_1) { Guess.new(info_request_1, subject_line, :subject) }
+ let(:guess_2) { Guess.new(info_request_2, subject_line, :subject) }
it { is_expected.to match_array([guess_1, guess_2]) }
end
From 76f9d88e9857ff3691d47a53e21698127c9e0ff2 Mon Sep 17 00:00:00 2001
From: Graeme Porteous
Date: Fri, 4 Aug 2023 11:19:56 +0100
Subject: [PATCH 2/4] Refactor Guess
Allow guesses to be passed different components. These components will
make up key/values used to calculate how good the guess is.
---
app/models/guess.rb | 23 ++-
app/models/info_request.rb | 11 +-
.../admin_raw_email/_holding_pen.html.erb | 4 +-
.../admin_raw_email_controller_spec.rb | 7 +-
spec/models/info_request_spec.rb | 172 +++++++++++++++---
5 files changed, 185 insertions(+), 32 deletions(-)
diff --git a/app/models/guess.rb b/app/models/guess.rb
index 17523695a6..47b1599e49 100644
--- a/app/models/guess.rb
+++ b/app/models/guess.rb
@@ -1,2 +1,23 @@
-class Guess < Struct.new(:info_request, :matched_value, :match_method)
+##
+# A guess at which info request a incoming message should be associated to
+#
+class Guess
+ attr_reader :info_request, :components
+
+ def initialize(info_request, **components)
+ @info_request = info_request
+ @components = components
+ end
+
+ def [](key)
+ components[key]
+ end
+
+ def ==(other)
+ info_request == other.info_request && components == other.components
+ end
+
+ def match_method
+ components.keys.first
+ end
end
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index 9626c76c29..4c1b25334c 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -244,8 +244,13 @@ def self.guess_by_incoming_email(*emails)
guesses = emails.flatten.reduce([]) do |memo, email|
id, idhash = _extract_id_hash_from_email(email)
id, idhash = _guess_idhash_from_email(email) if idhash.nil? || id.nil?
- memo << Guess.new(find_by_id(id), email, :id)
- memo << Guess.new(find_by_idhash(idhash), email, :idhash)
+
+ memo << Guess.new(
+ find_by_id(id), email: email, id: id, idhash: idhash
+ )
+ memo << Guess.new(
+ find_by_idhash(idhash), email: email, id: id, idhash: idhash
+ )
end
# Unique Guesses where we've found an `InfoRequest`
@@ -314,7 +319,7 @@ def self.guess_by_incoming_subject(subject_line)
limit(25)
guesses = requests.each.reduce([]) do |memo, request|
- memo << Guess.new(request, subject_line, :subject)
+ memo << Guess.new(request, subject: subject_line)
end
# Unique Guesses where we've found an `InfoRequest`
diff --git a/app/views/admin_raw_email/_holding_pen.html.erb b/app/views/admin_raw_email/_holding_pen.html.erb
index 853c22c27e..fd5ec87113 100644
--- a/app/views/admin_raw_email/_holding_pen.html.erb
+++ b/app/views/admin_raw_email/_holding_pen.html.erb
@@ -54,12 +54,12 @@
<%= guess.match_method %>
<% if guess.match_method == :subject %>
because the incoming message has a subject of
- <%= guess.matched_value %>
+ <%= guess[:subject] %>
<% else %>
because it has an incoming email address
of <%= guess.info_request.incoming_email %>
and this incoming message was sent to
- <%= guess.matched_value %>.
+ <%= guess[:email] %>.
<% end %>
diff --git a/spec/controllers/admin_raw_email_controller_spec.rb b/spec/controllers/admin_raw_email_controller_spec.rb
index ec83eacd6b..3bf0025805 100644
--- a/spec/controllers/admin_raw_email_controller_spec.rb
+++ b/spec/controllers/admin_raw_email_controller_spec.rb
@@ -63,7 +63,10 @@
it 'assigns guessed requests based on the hash' do
get :show, params: { id: incoming_message.raw_email.id }
- guess = Guess.new(info_request, invalid_to, :idhash)
+ idhash = InfoRequest.hash_from_id(info_request.id)
+ guess = Guess.new(
+ info_request, email: invalid_to, id: nil, idhash: idhash
+ )
expect(assigns[:guessed_info_requests]).to eq([guess])
end
@@ -72,7 +75,7 @@
FactoryBot.create(:incoming_message, subject: 'Basic Email').
info_request
get :show, params: { id: incoming_message.raw_email.id }
- guess = Guess.new(other_request, 'Basic Email', :subject)
+ guess = Guess.new(other_request, subject: 'Basic Email')
expect(assigns[:guessed_info_requests]).to include(guess)
end
diff --git a/spec/models/info_request_spec.rb b/spec/models/info_request_spec.rb
index 97d908e049..6a0fd5920c 100644
--- a/spec/models/info_request_spec.rb
+++ b/spec/models/info_request_spec.rb
@@ -1910,7 +1910,16 @@
context 'email with an intact id and broken idhash' do
let(:email) { "request-#{ info_request.id }-asdfg@example.com" }
- let(:guess) { Guess.new(info_request, email, :id) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: info_request.id,
+ idhash: 'asdfg'
+ )
+ end
+
it { is_expected.to include(guess) }
end
@@ -1919,7 +1928,15 @@
"request-#{ info_request.id }ab-#{ info_request.idhash }@example.com"
end
- let(:guess) { Guess.new(info_request, email, :idhash) }
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: nil,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
@@ -1928,13 +1945,30 @@
"request-a12x3b-#{ info_request.idhash }@example.com"
end
- let(:guess) { Guess.new(info_request, email, :idhash) }
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: nil,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
context 'upper case email with a broken id and otherwise intact idhash' do
let(:email) { "REQUEST-123a-#{ info_request.idhash.upcase }@example.com" }
- let(:guess) { Guess.new(info_request, email, :idhash) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: nil,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
@@ -1943,7 +1977,15 @@
"request#{ info_request.id }#{ info_request.idhash }@example.com"
end
- let(:guess) { Guess.new(info_request, email, :id) }
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: info_request.id,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
@@ -1952,19 +1994,45 @@
"request-#{ info_request.id }#{ info_request.idhash }@example.com"
end
- let(:guess) { Guess.new(info_request, email, :id) }
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: info_request.id,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
context 'email with a broken id and an intact idhash but missing punctuation' do
let(:email) { "request123ab#{ info_request.idhash }@example.com" }
- let(:guess) { Guess.new(info_request, email, :idhash) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: nil,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
context 'email with an intact id and a broken idhash but missing punctuation' do
let(:email) { "request#{ info_request.id }abcdefgh@example.com" }
- let(:guess) { Guess.new(info_request, email, :id) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: info_request.id,
+ idhash: 'abcdefgh'
+ )
+ end
+
it { is_expected.to include(guess) }
end
@@ -1972,13 +2040,31 @@
before { InfoRequest.where(id: 1_231_014).destroy_all }
let!(:info_request) { FactoryBot.create(:info_request, id: 1_231_014) }
let(:email) { 'request-123loL4abcdefgh@example.com' }
- let(:guess) { Guess.new(info_request, email, :id) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: info_request.id,
+ idhash: 'abcdefgh'
+ )
+ end
+
it { is_expected.to include(guess) }
end
context 'email with a broken id and an intact idhash but broken format' do
let(:email) { "reqeust=123ab#{ info_request.idhash }@example.com" }
- let(:guess) { Guess.new(info_request, email, :idhash) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email,
+ id: nil,
+ idhash: info_request.idhash
+ )
+ end
+
it { is_expected.to include(guess) }
end
@@ -2000,8 +2086,23 @@
"request-#{ info_request_1.id }-#{ info_request_2.idhash }@example.com"
end
- let(:guess_1) { Guess.new(info_request_1, email, :id) }
- let(:guess_2) { Guess.new(info_request_2, email, :idhash) }
+ let(:guess_1) do
+ Guess.new(
+ info_request_1,
+ email: email,
+ id: info_request_1.id,
+ idhash: info_request_2.idhash
+ )
+ end
+
+ let(:guess_2) do
+ Guess.new(
+ info_request_2,
+ email: email,
+ id: info_request_1.id,
+ idhash: info_request_2.idhash
+ )
+ end
it { is_expected.to match_array([guess_1, guess_2]) }
end
@@ -2010,7 +2111,15 @@
let(:email_1) { "request-123ab-#{ info_request.idhash }@example.com" }
let(:email_2) { "request-#{ info_request.id }-asdfg@example.com" }
let(:email) { [email_1, email_2] }
- let(:guess) { Guess.new(info_request, email_1, :idhash) }
+
+ let(:guess) do
+ Guess.new(
+ info_request,
+ email: email_1,
+ id: nil,
+ idhash: info_request.idhash
+ )
+ end
it { is_expected.to match_array([guess]) }
end
@@ -2029,8 +2138,23 @@
let(:email) { [email_1, email_2] }
- let(:guess_1) { Guess.new(info_request_1, email_1, :id) }
- let(:guess_2) { Guess.new(info_request_2, email_1, :idhash) }
+ let(:guess_1) do
+ Guess.new(
+ info_request_1,
+ email: email_1,
+ id: info_request_1.id,
+ idhash: info_request_2.idhash
+ )
+ end
+
+ let(:guess_2) do
+ Guess.new(
+ info_request_2,
+ email: email_1,
+ id: info_request_1.id,
+ idhash: info_request_2.idhash
+ )
+ end
it { is_expected.to match_array([guess_1, guess_2]) }
end
@@ -2049,7 +2173,7 @@
context 'a direct reply to the original request email' do
let(:subject_line) { info_request.email_subject_followup }
- let(:guess) { Guess.new(info_request, subject_line, :subject) }
+ let(:guess) { Guess.new(info_request, subject: subject_line) }
it { is_expected.to include(guess) }
end
@@ -2059,7 +2183,7 @@
info_request.email_subject_followup.gsub('Re: ', 'RE: ')
end
- let(:guess) { Guess.new(info_request, subject_line, :subject) }
+ let(:guess) { Guess.new(info_request, subject: subject_line) }
it { is_expected.to include(guess) }
end
@@ -2077,8 +2201,8 @@
'Re: Freedom of Information request - How many jelly beans?'
end
- let(:guess_1) { Guess.new(info_request_1, subject_line, :subject) }
- let(:guess_2) { Guess.new(info_request_2, subject_line, :subject) }
+ let(:guess_1) { Guess.new(info_request_1, subject: subject_line) }
+ let(:guess_2) { Guess.new(info_request_2, subject: subject_line) }
it { is_expected.to match_array([guess_1, guess_2]) }
end
@@ -2099,7 +2223,7 @@
end
let(:subject_line) { 'Our ref: 12345678' }
- let(:guess) { Guess.new(info_request, subject_line, :subject) }
+ let(:guess) { Guess.new(info_request, subject: subject_line) }
it { is_expected.to include(guess) }
end
@@ -2114,7 +2238,7 @@
end
let(:guess) do
- Guess.new(InfoRequest.holding_pen_request, subject_line, :subject)
+ Guess.new(InfoRequest.holding_pen_request, subject: subject_line)
end
it { is_expected.to_not include(guess) }
@@ -2128,7 +2252,7 @@
info_request: info_request)
end
- let(:guess) { Guess.new(info_request, subject_line, :subject) }
+ let(:guess) { Guess.new(info_request, subject: subject_line) }
it { is_expected.to include(guess) }
end
@@ -2144,8 +2268,8 @@
FactoryBot.create(:incoming_message, subject: subject_line).info_request
end
- let(:guess_1) { Guess.new(info_request_1, subject_line, :subject) }
- let(:guess_2) { Guess.new(info_request_2, subject_line, :subject) }
+ let(:guess_1) { Guess.new(info_request_1, subject: subject_line) }
+ let(:guess_2) { Guess.new(info_request_2, subject: subject_line) }
it { is_expected.to match_array([guess_1, guess_2]) }
end
From 0383cfb110b43daa7869a21f989ecae265d6a040 Mon Sep 17 00:00:00 2001
From: Graeme Porteous
Date: Mon, 7 Aug 2023 12:19:32 +0100
Subject: [PATCH 3/4] Add scoring methods to Guess
Test how close the id/idhash components are to those on the info
request. These scores can then be used to deliver incoming messages to a
request or not.
---
app/models/guess.rb | 18 +++++++++++++
spec/models/guess_spec.rb | 55 +++++++++++++++++++++++++++++++++++++++
2 files changed, 73 insertions(+)
create mode 100644 spec/models/guess_spec.rb
diff --git a/app/models/guess.rb b/app/models/guess.rb
index 47b1599e49..1220f789f6 100644
--- a/app/models/guess.rb
+++ b/app/models/guess.rb
@@ -1,3 +1,5 @@
+require 'text'
+
##
# A guess at which info request a incoming message should be associated to
#
@@ -13,6 +15,16 @@ def [](key)
components[key]
end
+ def id_score
+ return 1 unless self[:id]
+ similarity(self[:id], info_request.id)
+ end
+
+ def idhash_score
+ return 1 unless self[:idhash]
+ similarity(self[:idhash], info_request.idhash)
+ end
+
def ==(other)
info_request == other.info_request && components == other.components
end
@@ -20,4 +32,10 @@ def ==(other)
def match_method
components.keys.first
end
+
+ private
+
+ def similarity(a, b)
+ Text::WhiteSimilarity.similarity(a.to_s, b.to_s)
+ end
end
diff --git a/spec/models/guess_spec.rb b/spec/models/guess_spec.rb
new file mode 100644
index 0000000000..4ca55ccb27
--- /dev/null
+++ b/spec/models/guess_spec.rb
@@ -0,0 +1,55 @@
+require 'spec_helper'
+
+RSpec.describe Guess do
+ let(:info_request) do
+ FactoryBot.create(:info_request, id: 100, idhash: '4e637388')
+ end
+
+ describe 'with a subject line given' do
+ let(:guess) { described_class.new(info_request, subject: 'subject_line') }
+
+ it 'returns an id_score of 1' do
+ expect(guess.id_score).to eq(1)
+ end
+
+ it 'returns an idhash_score of 1' do
+ expect(guess.idhash_score).to eq(1)
+ end
+ end
+
+ describe 'with an id given' do
+ let(:guess_1) { described_class.new(info_request, id: 100) }
+ let(:guess_2) { described_class.new(info_request, id: 456) }
+ let(:guess_3) { described_class.new(info_request, id: 109) }
+
+ it 'returns an id_score of 1 when it is correct' do
+ expect(guess_1.id_score).to eq(1.0)
+ end
+
+ it 'returns an id_score of 0 when there is no similarity' do
+ expect(guess_2.id_score).to eq(0.0)
+ end
+
+ it 'returns a value between 0 and 1 when there is some similarity' do
+ expect(guess_3.id_score).to be_between(0, 1).exclusive
+ end
+ end
+
+ describe 'with an idhash given' do
+ let(:guess_1) { described_class.new(info_request, idhash: '4e637388') }
+ let(:guess_2) { described_class.new(info_request, idhash: '12345678') }
+ let(:guess_3) { described_class.new(info_request, idhash: '4e637399') }
+
+ it 'returns an idhash_score of 1 when it is correct' do
+ expect(guess_1.idhash_score).to eq(1.0)
+ end
+
+ it 'returns an idhash_score of 0 when there is no similarity' do
+ expect(guess_2.idhash_score).to eq(0.0)
+ end
+
+ it 'returns a value between 0 and 1 when there is some similarity' do
+ expect(guess_3.idhash_score).to be_between(0, 1).exclusive
+ end
+ end
+end
From 6c455efd9b0715b7d9de15c47473135f2f9e7a86 Mon Sep 17 00:00:00 2001
From: Alexander Griffen
Date: Fri, 14 Jul 2023 15:43:53 +0100
Subject: [PATCH 4/4] feat: guessed mail don't go to holding pen
Now, the 'Guessing' mechanism will be used when a mail is
received. If there is only one guess produced by this mechanism,
then the mail will go straight to that guess's IR
---
app/mailers/request_mailer.rb | 50 ++++++++++++++++++-----------
app/models/guess.rb | 23 +++++++++++++
spec/integration/admin_spec.rb | 20 ------------
spec/mailers/request_mailer_spec.rb | 19 +++++++++++
spec/models/guess_spec.rb | 40 +++++++++++++++++++++++
5 files changed, 113 insertions(+), 39 deletions(-)
diff --git a/app/mailers/request_mailer.rb b/app/mailers/request_mailer.rb
index fa42e853f9..47daf34ee9 100644
--- a/app/mailers/request_mailer.rb
+++ b/app/mailers/request_mailer.rb
@@ -235,35 +235,47 @@ def requests_matching_email(email)
InfoRequest.matching_incoming_email(addresses)
end
+ def send_to_holding_pen(email, raw_email, opts)
+ opts[:rejected_reason] =
+ _("Could not identify the request from the email address")
+ request = InfoRequest.holding_pen_request
+ request.receive(email, raw_email, opts)
+ end
+
# Member function, called on the new class made in self.receive above
def receive(email, raw_email, source = :mailin)
opts = { source: source }
- # Find which info requests the email is for
- reply_info_requests = requests_matching_email(email)
+ # Only check mail that doesn't have spam in the header
+ return if SpamAddress.spam?(MailHandler.get_all_addresses(email))
- # Nothing found OR multiple different info requests, so save in holding pen
- if reply_info_requests.empty? || reply_info_requests.count > 1
- opts[:rejected_reason] =
- _("Could not identify the request from the email address")
- request = InfoRequest.holding_pen_request
+ # Find exact matches for info requests
+ exact_info_requests = requests_matching_email(email)
- unless SpamAddress.spam?(MailHandler.get_all_addresses(email))
- request.receive(email, raw_email, opts)
- end
- return
+ # Find any guesses for info requests
+ unless exact_info_requests.count == 1
+ guessed_info_requests = Guess.guessed_info_requests(email)
end
- # Send the message to each request, to be archived with it
- reply_info_requests.each do |reply_info_request|
- # If environment variable STOP_DUPLICATES is set, don't send message with same id again
- if ENV['STOP_DUPLICATES']
- if reply_info_request.already_received?(email, raw_email)
- raise "message #{ email.message_id } already received by request"
- end
+ # If there is only one info request matching mail, it gets attached to the
+ # request to be archived with it
+ if exact_info_requests.count == 1 || guessed_info_requests.count == 1
+ info_request = exact_info_requests.first || guessed_info_requests.first
+
+ if exact_info_requests.empty? && guessed_info_requests.count == 1
+ info_request.log_event(
+ 'redeliver_incoming',
+ editor: 'automatic',
+ destination_request: info_request
+ )
end
- reply_info_request.receive(email, raw_email, opts)
+ info_request.receive(email, raw_email, opts)
+
+ else
+ # Otherwise, if there are no matching IRs, multiple IRs, or multiple IR
+ # guesses, we send the mail to the holding pen
+ send_to_holding_pen(email, raw_email, opts)
end
end
diff --git a/app/models/guess.rb b/app/models/guess.rb
index 1220f789f6..f75eafbf70 100644
--- a/app/models/guess.rb
+++ b/app/models/guess.rb
@@ -6,6 +6,29 @@
class Guess
attr_reader :info_request, :components
+ # The percentage similarity the id or idhash much fulfil
+ THRESHOLD = 0.8
+
+ ##
+ # Return InfoRequest which we guess should receive an incoming message based
+ # on a threshold.
+ #
+ def self.guessed_info_requests(email)
+ # Match the email address in the message without matching the hash
+ email_addresses = MailHandler.get_all_addresses(email)
+ guesses = InfoRequest.guess_by_incoming_email(email_addresses)
+
+ guesses_reaching_threshold = guesses.select do |ir_guess|
+ id_score = ir_guess.id_score
+ idhash_score = ir_guess.idhash_score
+
+ (id_score == 1 && idhash_score >= THRESHOLD) ||
+ (id_score >= THRESHOLD && idhash_score == 1)
+ end
+
+ guesses_reaching_threshold.map(&:info_request).uniq
+ end
+
def initialize(info_request, **components)
@info_request = info_request
@components = components
diff --git a/spec/integration/admin_spec.rb b/spec/integration/admin_spec.rb
index e298236a4a..edfc4c0a96 100644
--- a/spec/integration/admin_spec.rb
+++ b/spec/integration/admin_spec.rb
@@ -127,26 +127,6 @@
expect(page).to have_content "Only the authority can reply to this request"
end
end
-
- it "guesses a misdirected request" do
- info_request = FactoryBot.create(:info_request,
- allow_new_responses_from: 'authority_only',
- handle_rejected_responses: 'holding_pen')
- mail_to = "request-#{info_request.id}-asdfg@example.com"
- receive_incoming_mail('incoming-request-plain.email', email_to: mail_to)
- interesting_email = last_holding_pen_mail
-
- # now we add another message to the queue, which we're not interested in
- receive_incoming_mail('incoming-request-plain.email',
- email_to: info_request.incoming_email,
- email_from: "")
- expect(holding_pen_messages.length).to eq(2)
- using_session(@admin) do
- visit admin_raw_email_path interesting_email
- expect(page).to have_content "Could not identify the request"
- expect(page).to have_content info_request.title
- end
- end
end
describe 'generating an upload url' do
diff --git a/spec/mailers/request_mailer_spec.rb b/spec/mailers/request_mailer_spec.rb
index b2563bb73c..3cd72d6b3d 100644
--- a/spec/mailers/request_mailer_spec.rb
+++ b/spec/mailers/request_mailer_spec.rb
@@ -24,6 +24,25 @@
deliveries.clear
end
+ it "should append it to the appropriate request if there is only one guess of information request" do
+ ir = info_requests(:fancy_dog_request)
+ expect(ir.incoming_messages.count).to eq(1) # in the fixture
+ receive_incoming_mail(
+ 'incoming-request-plain.email',
+ email_to: "request-#{ir.id}-#{ir.idhash}a@localhost"
+ )
+ expect(ir.incoming_messages.count).to eq(2) # one more arrives
+ expect(ir.info_request_events[-1].incoming_message_id).not_to be_nil
+ expect(ir.info_request_events[-2].params[:editor]).to eq("automatic")
+
+ deliveries = ActionMailer::Base.deliveries
+ expect(deliveries.size).to eq(1)
+ mail = deliveries[0]
+ # to the user who sent fancy_dog_request
+ expect(mail.to).to eq(['bob@localhost'])
+ deliveries.clear
+ end
+
it "should store mail in holding pen and send to admin when the email is not to any information request" do
ir = info_requests(:fancy_dog_request)
expect(ir.incoming_messages.count).to eq(1)
diff --git a/spec/models/guess_spec.rb b/spec/models/guess_spec.rb
index 4ca55ccb27..d579bba0ae 100644
--- a/spec/models/guess_spec.rb
+++ b/spec/models/guess_spec.rb
@@ -5,6 +5,46 @@
FactoryBot.create(:info_request, id: 100, idhash: '4e637388')
end
+ describe '.guessed_info_requests' do
+ subject(:guesses) { described_class.guessed_info_requests(email) }
+
+ let(:email) do
+ mail = Mail.new
+ mail.to address
+ mail
+ end
+
+ let(:info_request) { FactoryBot.create(:info_request, id: 4566) }
+ let!(:other_info_request) { FactoryBot.create(:info_request) }
+
+ let(:id) { info_request.id }
+ let(:hash) { info_request.idhash }
+
+ context 'with email matching ID and ID hash' do
+ let(:address) { info_request.incoming_email }
+
+ it 'return matching InfoRequest' do
+ is_expected.to match_array([info_request])
+ end
+ end
+
+ context 'with email matching ID and almost ID hash' do
+ let(:address) { "request-#{id}-#{hash[0...-1]}}z@localhost" }
+
+ it 'return guessed InfoRequest' do
+ is_expected.to match_array([info_request])
+ end
+ end
+
+ context 'with email matching ID hash and almost ID' do
+ let(:address) { "request-#{id.to_s[0...-1]}-#{hash}@localhost" }
+
+ it 'return guessed InfoRequest' do
+ is_expected.to match_array([info_request])
+ end
+ end
+ end
+
describe 'with a subject line given' do
let(:guess) { described_class.new(info_request, subject: 'subject_line') }