diff --git a/findspam.py b/findspam.py index 7420ac0c21..5cfabc9b1f 100644 --- a/findspam.py +++ b/findspam.py @@ -608,12 +608,19 @@ def len_img_block(string): # max_score=2 to prevent voting fraud @create_rule("post is mostly images", title=False, max_rep=201, max_score=2) def mostly_img(s, site): - if len(s) == 0: + s_len_orig = len(s) + if s_len_orig == 0: return False, "" + # Strip code blocks manually. This should be removed once feature + # https://chat.stackexchange.com/transcript/message/54842978 + # get implemented. + s = regex.sub("(?s)
.*?
", "\nstripped pre\n", s) + s = regex.sub("(?s).*?", "\nstripped code\n", s) + s_len_img = len_img_block(s) - if s_len_img / len(s) > IMG_TXT_R_THRES: - return True, "{:.4f} of the post is html image blocks".format(s_len_img / len(s)) + if s_len_img / s_len_orig > IMG_TXT_R_THRES: + return True, "{:.4f} of the post is html image blocks".format(s_len_img / s_len_orig) return False, ""