From 742d208c4df4d965c12fd798c3c4006cd3fb06ff Mon Sep 17 00:00:00 2001 From: AbstractData Date: Mon, 6 Jun 2022 22:31:14 +0800 Subject: [PATCH] Update requests_html.py fix the bug caused by lxml==4.9.0(maybe lower?). see issue #469 and #479. --- requests_html.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requests_html.py b/requests_html.py index 48e7fb2..a95276b 100644 --- a/requests_html.py +++ b/requests_html.py @@ -95,7 +95,7 @@ def raw_html(self) -> _RawHTML: if self._html: return self._html else: - return etree.tostring(self.element, encoding='unicode').strip().encode(self.encoding) + return etree.tostring(self.element, encoding='unicode', method='html').strip().encode(self.encoding) @property def html(self) -> _BaseHTML: @@ -105,7 +105,7 @@ def html(self) -> _BaseHTML: if self._html: return self.raw_html.decode(self.encoding, errors='replace') else: - return etree.tostring(self.element, encoding='unicode').strip() + return etree.tostring(self.element, encoding='unicode', method='html').strip() @html.setter def html(self, html: str) -> None: @@ -229,7 +229,7 @@ def find(self, selector: str = "*", *, containing: _Containing = None, clean: bo elements = [] for element in elements_copy: - element.raw_html = lxml_html_tostring(cleaner.clean_html(element.lxml)) + element.raw_html = lxml_html_tostring(cleaner.clean_html(element.lxml,encoding='utf8')) elements.append(element) return _get_first_or_list(elements, first)