Skip to content

Commit 3511c2e

Browse files
[3.11] gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908) (GH-136920)
This fixes a regression introduced in GH-135930. (cherry picked from commit dee6501) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 228509e commit 3511c2e

File tree

3 files changed

+18
-19
lines changed

3 files changed

+18
-19
lines changed

Lib/html/parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
(
4444
(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
4545
)
46-
(= # value indicator
46+
([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
4747
('[^']*' # LITA-enclosed value
4848
|"[^"]*" # LIT-enclosed value
4949
|(?!['"])[^>\t\n\r\f ]* # bare value
@@ -55,7 +55,7 @@
5555
[a-zA-Z][^\t\n\r\f />]* # tag name
5656
[\t\n\r\f /]* # optional whitespace before attribute name
5757
(?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
58-
(?:= # value indicator
58+
(?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
5959
(?:'[^']*' # LITA-enclosed value
6060
|"[^"]*" # LIT-enclosed value
6161
|(?!['"])[^>\t\n\r\f ]* # bare value

Lib/test/test_htmlparser.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def test_correct_detection_of_start_tags(self):
595595

596596
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
597597
expected = [
598-
('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
598+
('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
599599
('starttag', 'b', []),
600600
('data', 'The '),
601601
('starttag', 'a', [('href', 'some_url')]),
@@ -751,12 +751,12 @@ def test_attr_syntax(self):
751751
]
752752
self._run_check("""<a b='v' c="v" d=v e>""", output)
753753
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
754-
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
755-
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
754+
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])])
755+
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])])
756756
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
757757
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
758-
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
759-
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
758+
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])])
759+
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])])
760760
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
761761
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
762762

@@ -767,8 +767,8 @@ def test_attr_values(self):
767767
("d", "\txyz\n")])])
768768
self._run_check("""<a b='' c="">""",
769769
[("starttag", "a", [("b", ""), ("c", "")])])
770-
self._run_check("<a b=\t c=\n>",
771-
[("starttag", "a", [("b", ""), ("c", "")])])
770+
self._run_check("<a b=\tx c=\ny>",
771+
[('starttag', 'a', [('b', 'x'), ('c', 'y')])])
772772
self._run_check("<a b=\v c=\xa0>",
773773
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
774774
# Regression test for SF patch #669683.
@@ -837,13 +837,17 @@ def test_malformed_attributes(self):
837837
)
838838
expected = [
839839
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
840-
('data', 'test - bad1'), ('endtag', 'a'),
840+
('data', 'test - bad1'),
841+
('endtag', 'a'),
841842
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
842-
('data', 'test - bad2'), ('endtag', 'a'),
843+
('data', 'test - bad2'),
844+
('endtag', 'a'),
843845
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
844-
('data', 'test - bad3'), ('endtag', 'a'),
845-
('starttag', 'a', [('href', None), ('=', None), ("test'&nbsp;style", 'color:red;bad4')]),
846-
('data', 'test - bad4'), ('endtag', 'a')
846+
('data', 'test - bad3'),
847+
('endtag', 'a'),
848+
('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
849+
('data', 'test - bad4'),
850+
('endtag', 'a'),
847851
]
848852
self._run_check(html, expected)
849853

Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,3 @@ according to the HTML5 standard.
1818

1919
* Multiple ``=`` between attribute name and value are no longer collapsed.
2020
E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".
21-
22-
* Whitespaces between the ``=`` separator and attribute name or value are no
23-
longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and
24-
"=bar", both with value None; ``<a foo= bar>`` produces two attributes:
25-
"foo" with value "" and "bar" with value None.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy