From 651f906b9a6bef2488bdec0270a395235a042576 Mon Sep 17 00:00:00 2001 From: Jack Cushman Date: Mon, 2 Aug 2021 15:59:47 -0400 Subject: [PATCH 1/2] Escape entities in html() output --- pyquery/pyquery.py | 5 +++-- tests/test_pyquery.py | 12 ++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pyquery/pyquery.py b/pyquery/pyquery.py index e549f0e..976a158 100644 --- a/pyquery/pyquery.py +++ b/pyquery/pyquery.py @@ -8,6 +8,7 @@ from .openers import url_opener from .text import extract_text from copy import deepcopy +from html import escape from lxml import etree import lxml.html import inspect @@ -1085,9 +1086,9 @@ def html(self, value=no_default, **kwargs): return None tag = self[0] children = tag.getchildren() + html = escape(tag.text or '', quote=False) if not children: - return tag.text or '' - html = tag.text or '' + return html if 'encoding' not in kwargs: kwargs['encoding'] = str html += u''.join([etree.tostring(e, **kwargs) diff --git a/tests/test_pyquery.py b/tests/test_pyquery.py index d82a556..340e11e 100644 --- a/tests/test_pyquery.py +++ b/tests/test_pyquery.py @@ -534,9 +534,10 @@ def test_val_for_textarea(self): self.assertEqual(d('#textarea-multi').val(), multi_expected) self.assertEqual(d('#textarea-multi').text(), multi_expected) multi_new = '''Bacon\nEggs\nSpam''' + multi_new_expected = '''Bacon\n<b>Eggs</b>\nSpam''' d('#textarea-multi').val(multi_new) - self.assertEqual(d('#textarea-multi').val(), multi_new) - self.assertEqual(d('#textarea-multi').text(), multi_new) + self.assertEqual(d('#textarea-multi').val(), multi_new_expected) + self.assertEqual(d('#textarea-multi').text(), multi_new_expected) def test_val_for_select(self): d = pq(self.html4) @@ -622,6 +623,13 @@ def test_html_replacement(self): self.assertEqual(new_html, expected) self.assertIn(replacement, new_html) + def test_html_escape(self): + inner_html = 'encoded <script> tag with "quotes".' \ + 'nested <tag>' + html = '
' + inner_html + '
' + d = pq(html) + self.assertEqual(d.html(), inner_html) + class TestAjax(TestCase): From 9538ff673fd9989d3e584115a07cbb2c1f6a8580 Mon Sep 17 00:00:00 2001 From: Jack Cushman Date: Thu, 5 Aug 2021 16:25:32 -0400 Subject: [PATCH 2/2] Update changelog --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 971d377..3e0901e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,6 +3,8 @@ - Add nextUntil method +- Fix escaping of top-level element text in ``.html()`` output + 1.4.3 (2020-11-21) ------------------