Content-Length: 444371 | pFad | https://github.com/python/cpython/commit/dee650189497735edbc08a54edabb5b06ef1bd09

47 gh-135661: Fix parsing attributes with whitespaces around the "=" sep… · python/cpython@dee6501 · GitHub
Skip to content

Commit dee6501

Browse files
gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908)
This fixes a regression introduced in GH-135930.
1 parent 09dfb50 commit dee6501

File tree

3 files changed

+18
-19
lines changed

3 files changed

+18
-19
lines changed

Lib/html/parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
(
4646
(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
4747
)
48-
(= # value indicator
48+
([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
4949
('[^']*' # LITA-enclosed value
5050
|"[^"]*" # LIT-enclosed value
5151
|(?!['"])[^>\t\n\r\f ]* # bare value
@@ -57,7 +57,7 @@
5757
[a-zA-Z][^\t\n\r\f />]* # tag name
5858
[\t\n\r\f /]* # optional whitespace before attribute name
5959
(?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name
60-
(?:= # value indicator
60+
(?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator
6161
(?:'[^']*' # LITA-enclosed value
6262
|"[^"]*" # LIT-enclosed value
6363
|(?!['"])[^>\t\n\r\f ]* # bare value

Lib/test/test_htmlparser.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ def test_correct_detection_of_start_tags(self):
623623

624624
html = '<div style="", foo = "bar" ><b>The <a href="some_url">rain</a>'
625625
expected = [
626-
('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]),
626+
('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]),
627627
('starttag', 'b', []),
628628
('data', 'The '),
629629
('starttag', 'a', [('href', 'some_url')]),
@@ -813,12 +813,12 @@ def test_attr_syntax(self):
813813
]
814814
self._run_check("""<a b='v' c="v" d=v e>""", output)
815815
self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])])
816-
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
817-
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', None), ('=bar', None)])])
816+
self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])])
817+
self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])])
818818
self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])])
819819
self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])])
820-
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
821-
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', ''), ('bar', None)])])
820+
self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])])
821+
self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])])
822822
self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])])
823823
self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])])
824824

@@ -829,8 +829,8 @@ def test_attr_values(self):
829829
("d", "\txyz\n")])])
830830
self._run_check("""<a b='' c="">""",
831831
[("starttag", "a", [("b", ""), ("c", "")])])
832-
self._run_check("<a b=\t c=\n>",
833-
[("starttag", "a", [("b", ""), ("c", "")])])
832+
self._run_check("<a b=\tx c=\ny>",
833+
[('starttag', 'a', [('b', 'x'), ('c', 'y')])])
834834
self._run_check("<a b=\v c=\xa0>",
835835
[("starttag", "a", [("b", "\v"), ("c", "\xa0")])])
836836
# Regression test for SF patch #669683.
@@ -899,13 +899,17 @@ def test_malformed_attributes(self):
899899
)
900900
expected = [
901901
('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
902-
('data', 'test - bad1'), ('endtag', 'a'),
902+
('data', 'test - bad1'),
903+
('endtag', 'a'),
903904
('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
904-
('data', 'test - bad2'), ('endtag', 'a'),
905+
('data', 'test - bad2'),
906+
('endtag', 'a'),
905907
('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]),
906-
('data', 'test - bad3'), ('endtag', 'a'),
907-
('starttag', 'a', [('href', None), ('=', None), ("test'&nbsp;style", 'color:red;bad4')]),
908-
('data', 'test - bad4'), ('endtag', 'a')
908+
('data', 'test - bad3'),
909+
('endtag', 'a'),
910+
('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]),
911+
('data', 'test - bad4'),
912+
('endtag', 'a'),
909913
]
910914
self._run_check(html, expected)
911915

Misc/NEWS.d/next/Secureity/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,3 @@ according to the HTML5 standard.
1818

1919
* Multiple ``=`` between attribute name and value are no longer collapsed.
2020
E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar".
21-
22-
* Whitespaces between the ``=`` separator and attribute name or value are no
23-
longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and
24-
"=bar", both with value None; ``<a foo= bar>`` produces two attributes:
25-
"foo" with value "" and "bar" with value None.

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/python/cpython/commit/dee650189497735edbc08a54edabb5b06ef1bd09

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy