Skip to content

Commit ad695f5

Browse files
[3.12] gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908) (GH-136919)
This fixes a regression introduced in GH-135930. (cherry picked from commit dee6501) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent ef053a9 commit ad695f5

File tree

3 files changed

+18
-19
lines changed

3 files changed

+18
-19
lines changed

Lib/html/parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
(
4444
(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
4545
)
46-
(= # value indicator
46+
([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
4747
('[^']*' # LITA-enclosed value
4848
|"[^"]*" # LIT-enclosed value
4949
|(?!['"])[^>\t\n\r\f ]* # bare value
@@ -55,7 +55,7 @@
5555
[a-zA-Z][^\t\n\r\f />]* # tag name
5656
[\t\n\r\f /]* # optional whitespace before attribute name
5757
(?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
58-
(?:= # value indicator
58+
(?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
5959
(?:'[^']*' # LITA-enclosed value
6060
|"[^"]*" # LIT-enclosed value
6161
|(?!['"])[^>\t\n\r\f ]* # bare value

Lib/test/test_htmlparser.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ def test_correct_detection_of_start_tags(self):
596596

597597
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
598598
expected = [
599-
('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
599+
('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
600600
('starttag', 'b', []),
601601
('data', 'The '),
602602
('starttag', 'a', [('href', 'some_url')]),
@@ -752,12 +752,12 @@ def test_attr_syntax(self):
752752
]
753753
self._run_check("""<a b='v' c="v" d=v e>""", output)
754754
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
755-
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
756-
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
755+
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])])
756+
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])])
757757
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
758758
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
759-
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
760-
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
759+
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])])
760+
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])])
761761
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
762762
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
763763

@@ -768,8 +768,8 @@ def test_attr_values(self):
768768
("d", "\txyz\n")])])
769769
self._run_check("""<a b='' c="">""",
770770
[("starttag", "a", [("b", ""), ("c", "")])])
771-
self._run_check("<a b=\t c=\n>",
772-
[("starttag", "a", [("b", ""), ("c", "")])])
771+
self._run_check("<a b=\tx c=\ny>",
772+
[('starttag', 'a', [('b', 'x'), ('c', 'y')])])
773773
self._run_check("<a b=\v c=\xa0>",
774774
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
775775
# Regression test for SF patch #669683.
@@ -838,13 +838,17 @@ def test_malformed_attributes(self):
838838
)
839839
expected = [
840840
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
841-
('data', 'test - bad1'), ('endtag', 'a'),
841+
('data', 'test - bad1'),
842+
('endtag', 'a'),
842843
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
843-
('data', 'test - bad2'), ('endtag', 'a'),
844+
('data', 'test - bad2'),
845+
('endtag', 'a'),
844846
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
845-
('data', 'test - bad3'), ('endtag', 'a'),
846-
('starttag', 'a', [('href', None), ('=', None), ("test'&nbsp;style", 'color:red;bad4')]),
847-
('data', 'test - bad4'), ('endtag', 'a')
847+
('data', 'test - bad3'),
848+
('endtag', 'a'),
849+
('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
850+
('data', 'test - bad4'),
851+
('endtag', 'a'),
848852
]
849853
self._run_check(html, expected)
850854

Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,3 @@ according to the HTML5 standard.
1818

1919
* Multiple ``=`` between attribute name and value are no longer collapsed.
2020
E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".
21-
22-
* Whitespaces between the ``=`` separator and attribute name or value are no
23-
longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and
24-
"=bar", both with value None; ``<a foo= bar>`` produces two attributes:
25-
"foo" with value "" and "bar" with value None.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy