better parsing for Lords Amemdments

rather than just parsing it all into a single line of text parse all the paragraphs and indents so that we try and retain a bit more structure.
mysociety · Mar 16, 2017 · d511b3e · d511b3e
1 parent 9c63f44
commit d511b3e
Showing 1 changed file with 11 additions and 6 deletions.
diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py
@@ -1663,12 +1663,17 @@ def parse_tabledby(self, tabledby):
         )
 
     def parse_amendment(self, amendment):
-        self.parse_para_with_member(
-            amendment,
-            None,
-            css_class='italic',
-            pwmotiontext='unrecognized'
-        )
+        # Amendments are often things like:
+        #
+        # <Amendment><hs_quote><B>54:</B>
+        # Clause 67, page 30, line 9, leave out “high” and insert
+        # “higher”</hs_quote></Amendment>
+        #
+        # so we need to parse the tags to make sure we get the
+        # indenting etc
+        for tag in amendment.getchildren():
+            tag_name = self.get_tag_name_no_ns(tag)
+            self.handle_tag(tag_name, tag)
 
     def parse_clause_heading(self, heading):
         tag = etree.Element('p')