From 983954f252c2c935fd9eb5804f7179842cba4a30 Mon Sep 17 00:00:00 2001
From: Ron Rademaker <ron@connectholland.nl>
Date: Wed, 16 May 2018 11:25:28 +0200
Subject: [PATCH 1/4] Increase flexibility of bruteforce signature extration

---
 talon/signature/bruteforce.py      | 140 ++-------------------
 talon/signature/constants.py       |  34 +++++
 talon/signature/extractor.py       | 195 +++++++++++++++++++++++++++++
 tests/signature/bruteforce_test.py |   4 +-
 4 files changed, 240 insertions(+), 133 deletions(-)
 create mode 100644 talon/signature/extractor.py
diff --git a/talon/signature/bruteforce.py b/talon/signature/bruteforce.py
index e502bab8..c352cfc6 100644
--- a/talon/signature/bruteforce.py
+++ b/talon/signature/bruteforce.py
@@ -1,64 +1,8 @@
 from __future__ import absolute_import
-
-import logging
-
-import regex as re
+from talon.signature.extractor import BruteForceExtractor
 
 from talon.signature.constants import (SIGNATURE_MAX_LINES,
                                        TOO_LONG_SIGNATURE_LINE)
-from talon.utils import get_delimiter
-
-log = logging.getLogger(__name__)
-
-# regex to fetch signature based on common signature words
-RE_SIGNATURE = re.compile(r'''
-               (
-                   (?:
-                       ^[\s]*--*[\s]*[a-z \.]*$
-                       |
-                       ^thanks[\s,!]*$
-                       |
-                       ^regards[\s,!]*$
-                       |
-                       ^cheers[\s,!]*$
-                       |
-                       ^best[ a-z]*[\s,!]*$
-                   )
-                   .*
-               )
-               ''', re.I | re.X | re.M | re.S)
-
-# signatures appended by phone email clients
-RE_PHONE_SIGNATURE = re.compile(r'''
-               (
-                   (?:
-                       ^sent[ ]{1}from[ ]{1}my[\s,!\w]*$
-                       |
-                       ^sent[ ]from[ ]Mailbox[ ]for[ ]iPhone.*$
-                       |
-                       ^sent[ ]([\S]*[ ])?from[ ]my[ ]BlackBerry.*$
-                       |
-                       ^Enviado[ ]desde[ ]mi[ ]([\S]+[ ]){0,2}BlackBerry.*$
-                   )
-                   .*
-               )
-               ''', re.I | re.X | re.M | re.S)
-
-# see _mark_candidate_indexes() for details
-# c - could be signature line
-# d - line starts with dashes (could be signature or list item)
-# l - long line
-RE_SIGNATURE_CANDIDATE = re.compile(r'''
-    (?P<candidate>c+d)[^d]
-    |
-    (?P<candidate>c+d)$
-    |
-    (?P<candidate>c+)
-    |
-    (?P<candidate>d)[^d]
-    |
-    (?P<candidate>d)$
-''', re.I | re.X | re.M | re.S)
 
 
 def extract_signature(msg_body):
@@ -73,46 +17,8 @@ def extract_signature(msg_body):
     >>> extract_signature('Hey man!')
     ('Hey man!', None)
     '''
-    try:
-        # identify line delimiter first
-        delimiter = get_delimiter(msg_body)
-
-        # make an assumption
-        stripped_body = msg_body.strip()
-        phone_signature = None
-
-        # strip off phone signature
-        phone_signature = RE_PHONE_SIGNATURE.search(msg_body)
-        if phone_signature:
-            stripped_body = stripped_body[:phone_signature.start()]
-            phone_signature = phone_signature.group()
-
-        # decide on signature candidate
-        lines = stripped_body.splitlines()
-        candidate = get_signature_candidate(lines)
-        candidate = delimiter.join(candidate)
-
-        # try to extract signature
-        signature = RE_SIGNATURE.search(candidate)
-        if not signature:
-            return (stripped_body.strip(), phone_signature)
-        else:
-            signature = signature.group()
-            # when we splitlines() and then join them
-            # we can lose a new line at the end
-            # we did it when identifying a candidate
-            # so we had to do it for stripped_body now
-            stripped_body = delimiter.join(lines)
-            stripped_body = stripped_body[:-len(signature)]
-
-            if phone_signature:
-                signature = delimiter.join([signature, phone_signature])
-
-            return (stripped_body.strip(),
-                    signature.strip())
-    except Exception:
-        log.exception('ERROR extracting signature')
-        return (msg_body, None)
+    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    return brute_force_extractor.extract_signature(msg_body)
 
 
 def get_signature_candidate(lines):
@@ -126,26 +32,8 @@ def get_signature_candidate(lines):
     * not include more than one line that starts with dashes
     """
     # non empty lines indexes
-    non_empty = [i for i, line in enumerate(lines) if line.strip()]
-
-    # if message is empty or just one line then there is no signature
-    if len(non_empty) <= 1:
-        return []
-
-    # we don't expect signature to start at the 1st line
-    candidate = non_empty[1:]
-    # signature shouldn't be longer then SIGNATURE_MAX_LINES
-    candidate = candidate[-SIGNATURE_MAX_LINES:]
-
-    markers = _mark_candidate_indexes(lines, candidate)
-    candidate = _process_marked_candidate_indexes(candidate, markers)
-
-    # get actual lines for the candidate instead of indexes
-    if candidate:
-        candidate = lines[candidate[0]:]
-        return candidate
-
-    return []
+    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    return brute_force_extractor._get_signature_candidate(lines)
 
 
 def _mark_candidate_indexes(lines, candidate):
@@ -161,18 +49,8 @@ def _mark_candidate_indexes(lines, candidate):
     'cdc'
     """
     # at first consider everything to be potential signature lines
-    markers = list('c' * len(candidate))
-
-    # mark lines starting from bottom up
-    for i, line_idx in reversed(list(enumerate(candidate))):
-        if len(lines[line_idx].strip()) > TOO_LONG_SIGNATURE_LINE:
-            markers[i] = 'l'
-        else:
-            line = lines[line_idx].strip()
-            if line.startswith('-') and line.strip("-"):
-                markers[i] = 'd'
-
-    return "".join(markers)
+    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    return brute_force_extractor._mark_candidate_indexes(lines, candidate)
 
 
 def _process_marked_candidate_indexes(candidate, markers):
@@ -183,5 +61,5 @@ def _process_marked_candidate_indexes(candidate, markers):
     >>> _process_marked_candidate_indexes([9, 12, 14, 15, 17], 'clddc')
     [15, 17]
     """
-    match = RE_SIGNATURE_CANDIDATE.match(markers[::-1])
-    return candidate[-match.end('candidate'):] if match else []
+    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    return brute_force_extractor._process_marked_candidate_indexes(candidate, markers)
diff --git a/talon/signature/constants.py b/talon/signature/constants.py
index 14f2006c..66d72bfa 100644
--- a/talon/signature/constants.py
+++ b/talon/signature/constants.py
@@ -1,2 +1,36 @@
+import regex as re
+
 SIGNATURE_MAX_LINES = 11
 TOO_LONG_SIGNATURE_LINE = 60
+
+# signatures appended by phone email clients
+RE_PHONE_SIGNATURE = re.compile(r'''
+               (
+                   (?:
+                       ^sent[ ]{1}from[ ]{1}my[\s,!\w]*$
+                       |
+                       ^sent[ ]from[ ]Mailbox[ ]for[ ]iPhone.*$
+                       |
+                       ^sent[ ]([\S]*[ ])?from[ ]my[ ]BlackBerry.*$
+                       |
+                       ^Enviado[ ]desde[ ]mi[ ]([\S]+[ ]){0,2}BlackBerry.*$
+                   )
+                   .*
+               )
+               ''', re.I | re.X | re.M | re.S)
+
+# see _mark_candidate_indexes() for details
+# c - could be signature line
+# d - line starts with dashes (could be signature or list item)
+# l - long line
+RE_SIGNATURE_CANDIDATE = re.compile(r'''
+    (?P<candidate>c+d)[^d]
+    |
+    (?P<candidate>c+d)$
+    |
+    (?P<candidate>c+)
+    |
+    (?P<candidate>d)[^d]
+    |
+    (?P<candidate>d)$
+''', re.I | re.X | re.M | re.S)
\ No newline at end of file
diff --git a/talon/signature/extractor.py b/talon/signature/extractor.py
new file mode 100644
index 00000000..f3b143c3
--- /dev/null
+++ b/talon/signature/extractor.py
@@ -0,0 +1,195 @@
+"""
+Module with object oriented approach to
+signature extractions. Built to be more
+flexible and to support more languages.
+"""
+from __future__ import absolute_import
+import re
+import logging
+
+from abc import ABC, abstractmethod
+from talon.utils import get_delimiter
+from talon.signature.constants import (SIGNATURE_MAX_LINES,
+                                       TOO_LONG_SIGNATURE_LINE,
+                                       RE_SIGNATURE_CANDIDATE,
+                                       RE_PHONE_SIGNATURE)
+
+log = logging.getLogger(__name__)
+
+# Defaults taken from bruteforce.py
+DEFAULT_GREETINGS = (
+    '[\s]*--*[\s]*[a-z \.]',
+    'thanks[\s,!]',
+    'regards[\s,!]',
+    'cheers[\s,!]',
+    'best[ a-z]*[\s,!]'
+)
+
+
+class AbstractExtractor(ABC):
+    """
+    Abstract base class for
+    signature extractors.
+    """
+
+    @abstractmethod
+    def extract_signature(self, message: str):
+        """
+        Extract the signature from
+        message and return the
+        text and signature
+
+        :param message: str
+        :return: (text: str, signature: str)
+        """
+        pass
+
+
+class BruteForceExtractor(AbstractExtractor):
+    """
+    Brute force signature extractor.
+    More flexible OO approach to
+    talon.signatures.bruteforce.extract_signature
+    """
+
+    def __init__(self, max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE,
+                 greetings=DEFAULT_GREETINGS):
+        """
+        Create a new brute force extractor. Allows override
+        max signature length, max signature line length and
+        common greetings (allows multi language support).
+        """
+        self.max_lines = max_lines
+        self.max_line_length = max_line_length
+        self._compile_greetings(greetings)
+
+    def extract_signature(self, msg_body: str):
+        """
+        Use brute force to extract the
+        signature (ie. regex and
+        string matching)
+
+        :param message: str
+        :return: (text: str, signature: str)
+        """
+        try:
+            # identify line delimiter first
+            delimiter = get_delimiter(msg_body)
+
+            # make an assumption
+            stripped_body = msg_body.strip()
+            phone_signature = None
+
+            # strip off phone signature
+            phone_signature = RE_PHONE_SIGNATURE.search(msg_body)
+            if phone_signature:
+                stripped_body = stripped_body[:phone_signature.start()]
+                phone_signature = phone_signature.group()
+
+            # decide on signature candidate
+            lines = stripped_body.splitlines()
+            candidate = self._get_signature_candidate(lines)
+            candidate = delimiter.join(candidate)
+
+            # try to extract signature
+            signature = self.re_signature.search(candidate)
+            if not signature:
+                return (stripped_body.strip(), phone_signature)
+            else:
+                signature = signature.group()
+                # when we splitlines() and then join them
+                # we can lose a new line at the end
+                # we did it when identifying a candidate
+                # so we had to do it for stripped_body now
+                stripped_body = delimiter.join(lines)
+                stripped_body = stripped_body[:-len(signature)]
+
+                if phone_signature:
+                    signature = delimiter.join([signature, phone_signature])
+
+                return (stripped_body.strip(),
+                        signature.strip())
+        except Exception:
+            log.exception('ERROR extracting signature')
+            return (msg_body, None)
+
+    def _compile_greetings(self, greetings):
+        """
+        Init the regex to detect the
+        greeting based on the passed
+        greetings
+
+        :param greetings:
+        """
+        greetings = ['^{}*$'.format(greeting) for greeting in greetings]
+        greetings = '|'.join(greetings)
+        self.re_signature = re.compile(r'((?:{}).*)'.format(greetings), re.I | re.X | re.M | re.S)
+
+    def _get_signature_candidate(self, lines):
+        """Return lines that could hold signature
+
+        The lines should:
+
+        * be among last SIGNATURE_MAX_LINES non-empty lines.
+        * not include first line
+        * be shorter than TOO_LONG_SIGNATURE_LINE
+        * not include more than one line that starts with dashes
+        """
+        # non empty lines indexes
+        non_empty = [i for i, line in enumerate(lines) if line.strip()]
+
+        # if message is empty or just one line then there is no signature
+        if len(non_empty) <= 1:
+            return []
+
+        # we don't expect signature to start at the 1st line
+        candidate = non_empty[1:]
+        # signature shouldn't be longer then SIGNATURE_MAX_LINES
+        candidate = candidate[-self.max_lines:]
+
+        markers = self._mark_candidate_indexes(lines, candidate)
+        candidate = self._process_marked_candidate_indexes(candidate, markers)
+
+        # get actual lines for the candidate instead of indexes
+        if candidate:
+            candidate = lines[candidate[0]:]
+            return candidate
+
+        return []
+
+    def _mark_candidate_indexes(self, lines, candidate):
+        """Mark candidate indexes with markers
+
+        Markers:
+
+        * c - line that could be a signature line
+        * l - long line
+        * d - line that starts with dashes but has other chars as well
+
+        >>> _mark_candidate_lines(['Some text', '', '-', 'Bob'], [0, 2, 3])
+        'cdc'
+        """
+        # at first consider everything to be potential signature lines
+        markers = list('c' * len(candidate))
+
+        # mark lines starting from bottom up
+        for i, line_idx in reversed(list(enumerate(candidate))):
+            if len(lines[line_idx].strip()) > self.max_line_length:
+                markers[i] = 'l'
+            else:
+                line = lines[line_idx].strip()
+                if line.startswith('-') and line.strip("-"):
+                    markers[i] = 'd'
+
+        return "".join(markers)
+
+    def _process_marked_candidate_indexes(self, candidate, markers):
+        """
+        Run regexes against candidate's marked indexes to strip
+        signature candidate.
+
+        >>> _process_marked_candidate_indexes([9, 12, 14, 15, 17], 'clddc')
+        [15, 17]
+        """
+        match = RE_SIGNATURE_CANDIDATE.match(markers[::-1])
+        return candidate[-match.end('candidate'):] if match else []
diff --git a/tests/signature/bruteforce_test.py b/tests/signature/bruteforce_test.py
index 382615bb..0d3221ff 100644
--- a/tests/signature/bruteforce_test.py
+++ b/tests/signature/bruteforce_test.py
@@ -4,7 +4,7 @@
 from .. import *
 
 from talon.signature import bruteforce
-
+from talon.signature import extractor
 
 def test_empty_body():
     eq_(('', None), bruteforce.extract_signature(''))
@@ -135,7 +135,7 @@ def test_blackberry_signature():
         bruteforce.extract_signature(msg_body))
 
 
-@patch.object(bruteforce, 'get_delimiter', Mock(side_effect=Exception()))
+@patch.object(extractor, 'get_delimiter', Mock(side_effect=Exception()))
 def test_crash_in_extract_signature():
     msg_body = '''Hey!
 -roman'''

From 7f00af73d1f00d2c4b0cc617973dea59bfd688b3 Mon Sep 17 00:00:00 2001
From: Ron Rademaker <ron@connectholland.nl>
Date: Tue, 14 Aug 2018 10:10:48 +0200
Subject: [PATCH 2/4] Fix syntax errors

---
 talon/signature/data/classifier | Bin 608 -> 729 bytes
 talon/signature/extractor.py    |   4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/talon/signature/data/classifier b/talon/signature/data/classifier
index 1c3a4b0865f3e951b1b3b17fb31bacc48d8d005b..c5c8a706120a2093339a412699fb8f177c981cc3 100644
GIT binary patch
literal 729
zcmZ`%O>Yx15Z!ds4>m2dd{fFtp+E}MY6?O^LIM<|N-XT5NX0kHyK%Y(uXo4xhUO3n
zF3?}akKu&GkAR62azXfD&Cbkw^WJzrhnfAT1UH)4K|wObtra$UBQ@sU9d;uKw!c7Q
z1aocL41z$F)<Q^EFtku}BYG^C`ba>NHZa$P`TZAL7~r8xcuffzEX?MJV<(K}%90Od
zs$@!ZT~tF>$qa?CtSl<-qU6;}WBBmB;Z-F}1dFI*X#}U*p4HZ=Cx$5V;7IrQxIKcF
zulJ=Anim3=>DpfrvM+^UyB(sdA*|FXUBt>jn6$J4PRD`olp3{VqH@e;T%j>I6F0o^
zUd<J(o_Obd>Da6j)=vD)f*&)9XgK@!XXweb|7LJ*B9dgK5u~V<lM}Jy5gfgppPJ~o
zVI?BCK<7}Id<+-kAZ~nvO9ybd9fy9u&)`auB(Hzt@mCx;SWmmn?_m(Y)vvH|0N4EN
z3jHfLgzKJ6?ZFM&LRLSWO?G#aA}A;a1~%z3MuzKNSupFk6L2f>RaYvwz5jgu$&X(f
z-5olQZCNS$47T#|i`*H(-F6=3t^D6b@A*aF!$n7knyrgL+e5cUaQ}bMM;`hBp<`WM
lE&C+oQSRVjLRT@zravvi2eh)(utOIxy-PYzNe4)sl0TCt-i!bM

delta 426
zcmZ9IyG{a85Qg_gSQk(+5sl&<6`~SvF?MP&p&(L_Ol(Znvv6QfvbQ;V7Nsz1qqc+x
z@eO<rJ7Z<yOSoXAQ_lREneTf~d}zvp(pO3)&>TMuQH4y15#h9D+RW=v)IhFaqIz18
zMg@BRdY2jX^E1v#?~;=spxnS@=@Ioi94mqzMpn}y@hYR~MQ+eDycQW|4NQUFW)Ag;
zOEC@EQHr%0<>u~Q*$aA@i7ZHVDYyKP;&@Gh>|jEk-Uh7|5eFw<Ak${TWS*eBNdtk|
z7tmdDYqGe3Qx{(<BnMaW@iZvx5OX!DCO_j$180jh739IIoVy3@O_d~^f5LnN7a$qL
z-#DU&i=a6BxCALTas)Gf0hU4ayBt>_&576YT{9F!P+YBpDtrgmswZ<tpWpe)4kVgB
zZJSuIhaT4NaHD7&cFO(-xnCX|Qf|hS#=utcTYnuKj2vvm)AZ=Rc|%#p7PwuHtnv@q
Cdx(ty

diff --git a/talon/signature/extractor.py b/talon/signature/extractor.py
index f3b143c3..2280b474 100644
--- a/talon/signature/extractor.py
+++ b/talon/signature/extractor.py
@@ -33,7 +33,7 @@ class AbstractExtractor(ABC):
     """
 
     @abstractmethod
-    def extract_signature(self, message: str):
+    def extract_signature(self, message):
         """
         Extract the signature from
         message and return the
@@ -63,7 +63,7 @@ def __init__(self, max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNA
         self.max_line_length = max_line_length
         self._compile_greetings(greetings)
 
-    def extract_signature(self, msg_body: str):
+    def extract_signature(self, msg_body):
         """
         Use brute force to extract the
         signature (ie. regex and

From 1398ae587c193e086415d54bc2ea345eb7f38324 Mon Sep 17 00:00:00 2001
From: Ron Rademaker <ron@connectholland.nl>
Date: Tue, 14 Aug 2018 10:15:23 +0200
Subject: [PATCH 3/4] Skip default parameters

---
 talon/signature/bruteforce.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/talon/signature/bruteforce.py b/talon/signature/bruteforce.py
index c352cfc6..b61a0ea1 100644
--- a/talon/signature/bruteforce.py
+++ b/talon/signature/bruteforce.py
@@ -17,7 +17,7 @@ def extract_signature(msg_body):
     >>> extract_signature('Hey man!')
     ('Hey man!', None)
     '''
-    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    brute_force_extractor = BruteForceExtractor()
     return brute_force_extractor.extract_signature(msg_body)
 
 
@@ -32,7 +32,7 @@ def get_signature_candidate(lines):
     * not include more than one line that starts with dashes
     """
     # non empty lines indexes
-    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    brute_force_extractor = BruteForceExtractor()
     return brute_force_extractor._get_signature_candidate(lines)
 
 
@@ -49,7 +49,7 @@ def _mark_candidate_indexes(lines, candidate):
     'cdc'
     """
     # at first consider everything to be potential signature lines
-    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    brute_force_extractor = BruteForceExtractor()
     return brute_force_extractor._mark_candidate_indexes(lines, candidate)
 
 
@@ -61,5 +61,5 @@ def _process_marked_candidate_indexes(candidate, markers):
     >>> _process_marked_candidate_indexes([9, 12, 14, 15, 17], 'clddc')
     [15, 17]
     """
-    brute_force_extractor = BruteForceExtractor(max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE)
+    brute_force_extractor = BruteForceExtractor()
     return brute_force_extractor._process_marked_candidate_indexes(candidate, markers)

From be8097c9a64d7139cedccd01f1999b5c0eb3730c Mon Sep 17 00:00:00 2001
From: Ron Rademaker <ron@connectholland.nl>
Date: Tue, 14 Aug 2018 10:17:37 +0200
Subject: [PATCH 4/4] Comment line lenghts

---
 talon/signature/extractor.py | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/talon/signature/extractor.py b/talon/signature/extractor.py
index 2280b474..d93d846c 100644
--- a/talon/signature/extractor.py
+++ b/talon/signature/extractor.py
@@ -1,6 +1,5 @@
 """
-Module with object oriented approach to
-signature extractions. Built to be more
+Module with object oriented approach to signature extractions. Built to be more
 flexible and to support more languages.
 """
 from __future__ import absolute_import
@@ -28,16 +27,13 @@
 
 class AbstractExtractor(ABC):
     """
-    Abstract base class for
-    signature extractors.
+    Abstract base class for signature extractors.
     """
 
     @abstractmethod
     def extract_signature(self, message):
         """
-        Extract the signature from
-        message and return the
-        text and signature
+        Extract the signature from message and return the text and signature
 
         :param message: str
         :return: (text: str, signature: str)
@@ -55,9 +51,8 @@ class BruteForceExtractor(AbstractExtractor):
     def __init__(self, max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNATURE_LINE,
                  greetings=DEFAULT_GREETINGS):
         """
-        Create a new brute force extractor. Allows override
-        max signature length, max signature line length and
-        common greetings (allows multi language support).
+        Create a new brute force extractor. Allows override max signature length, 
+        max signature line length and common greetings (allows multi language support).
         """
         self.max_lines = max_lines
         self.max_line_length = max_line_length
@@ -65,9 +60,7 @@ def __init__(self, max_lines=SIGNATURE_MAX_LINES, max_line_length=TOO_LONG_SIGNA
 
     def extract_signature(self, msg_body):
         """
-        Use brute force to extract the
-        signature (ie. regex and
-        string matching)
+        Use brute force to extract the signature (ie. regex and string matching)
 
         :param message: str
         :return: (text: str, signature: str)
@@ -97,10 +90,8 @@ def extract_signature(self, msg_body):
                 return (stripped_body.strip(), phone_signature)
             else:
                 signature = signature.group()
-                # when we splitlines() and then join them
-                # we can lose a new line at the end
-                # we did it when identifying a candidate
-                # so we had to do it for stripped_body now
+                # when we splitlines() and then join them we can lose a new line at the end
+                # we did it when identifying a candidate so we had to do it for stripped_body now
                 stripped_body = delimiter.join(lines)
                 stripped_body = stripped_body[:-len(signature)]
 
@@ -185,8 +176,7 @@ def _mark_candidate_indexes(self, lines, candidate):
 
     def _process_marked_candidate_indexes(self, candidate, markers):
         """
-        Run regexes against candidate's marked indexes to strip
-        signature candidate.
+        Run regexes against candidate's marked indexes to strip signature candidate.
 
         >>> _process_marked_candidate_indexes([9, 12, 14, 15, 17], 'clddc')
         [15, 17]