From 31cd2c01ddff21ca7108ede94518a593899a6224 Mon Sep 17 00:00:00 2001 From: Ndipbanyan Date: Sat, 24 Apr 2021 16:05:47 +0100 Subject: [PATCH 1/3] filter unsubscribe emails --- script/rss2html.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/rss2html.ml b/script/rss2html.ml index a5ad663c1..5994bc545 100644 --- a/script/rss2html.ml +++ b/script/rss2html.ml @@ -589,7 +589,7 @@ let delete_author title = (* Remove the "[Caml-list]" and possible "Re:". *) let caml_list_re = Str.regexp_case_fold "^\\(Re: *\\)*\\(\\[[a-zA-Z0-9-]+\\] *\\)*" - +let unsubscribe_email_re= Str.regexp_case_fold ".*unsubscribe.*" (** [email_threads] does basically the same as [headlines] but filter the posts to have repeated subjects. It also presents the subject better. *) @@ -609,6 +609,7 @@ let email_threads ?n ~l9n url = let must_keep (e: Atom.entry) = let title = string_of_text_construct e.Atom.title in if S.mem title !seen then false + else if Str.string_match unsubscribe_email_re title 0 then false else (seen := S.add title !seen; true) in let posts = List.filter must_keep posts in let posts = (match n with From 96a018b04e2e23948ab3388b22372e84930d17ee Mon Sep 17 00:00:00 2001 From: Ndipbanyan Date: Sat, 24 Apr 2021 17:52:35 +0100 Subject: [PATCH 2/3] better formatting --- script/rss2html.ml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/script/rss2html.ml b/script/rss2html.ml index 5994bc545..d7af4882f 100644 --- a/script/rss2html.ml +++ b/script/rss2html.ml @@ -589,7 +589,11 @@ let delete_author title = (* Remove the "[Caml-list]" and possible "Re:". *) let caml_list_re = Str.regexp_case_fold "^\\(Re: *\\)*\\(\\[[a-zA-Z0-9-]+\\] *\\)*" -let unsubscribe_email_re= Str.regexp_case_fold ".*unsubscribe.*" + + (*Remove the unsubscribe emails*) +let unsubscribe_email_re = + Str.regexp_case_fold ".*unsubscribe.*" + (** [email_threads] does basically the same as [headlines] but filter the posts to have repeated subjects. It also presents the subject better. *) @@ -603,7 +607,7 @@ let email_threads ?n ~l9n url = let title = delete_author title in { e with Atom.title = Atom.Text title } in let posts = List.map normalize_title posts in - (* Keep only the more recent post of redundant subjects. *) + (* Keep only the more recent post of redundant subjects filter out the unsubscribe emails *) let module S = Set.Make(String) in let seen = ref S.empty in let must_keep (e: Atom.entry) = From cc04b0d3d3cd64c0a3b5eb2acb8444cf57654f12 Mon Sep 17 00:00:00 2001 From: Ndipbanyan Date: Sat, 24 Apr 2021 19:20:53 +0100 Subject: [PATCH 3/3] better formatting --- script/rss2html.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/rss2html.ml b/script/rss2html.ml index d7af4882f..a4dbf3faa 100644 --- a/script/rss2html.ml +++ b/script/rss2html.ml @@ -607,7 +607,7 @@ let email_threads ?n ~l9n url = let title = delete_author title in { e with Atom.title = Atom.Text title } in let posts = List.map normalize_title posts in - (* Keep only the more recent post of redundant subjects filter out the unsubscribe emails *) + (* Keep only the more recent post of redundant subjects and filter out the unsubscribe emails *) let module S = Set.Make(String) in let seen = ref S.empty in let must_keep (e: Atom.entry) =