Skip to content

Commit

Permalink
better parsing for Lords Amemdments
Browse files Browse the repository at this point in the history
rather than just parsing it all into a single line of text parse all the
paragraphs and indents so that we try and retain a bit more structure.
  • Loading branch information
struan committed Mar 16, 2017
1 parent 9c63f44 commit d511b3e
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions pyscraper/new_hansard.py
Original file line number Diff line number Diff line change
Expand Up @@ -1663,12 +1663,17 @@ def parse_tabledby(self, tabledby):
)

def parse_amendment(self, amendment):
self.parse_para_with_member(
amendment,
None,
css_class='italic',
pwmotiontext='unrecognized'
)
# Amendments are often things like:
#
# <Amendment><hs_quote><B>54:</B>
# Clause 67, page 30, line 9, leave out “high” and insert
# “higher”</hs_quote></Amendment>
#
# so we need to parse the tags to make sure we get the
# indenting etc
for tag in amendment.getchildren():
tag_name = self.get_tag_name_no_ns(tag)
self.handle_tag(tag_name, tag)

def parse_clause_heading(self, heading):
tag = etree.Element('p')
Expand Down

0 comments on commit d511b3e

Please sign in to comment.