Skip to content

Commit 6879f6a

Browse files
author
Greg Guthe
committed
1 parent 90cb80b commit 6879f6a

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

bleach/html5lib_shim.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,22 @@ def convert_entity(value):
459459
if value[0] == "#":
460460
if len(value) < 2:
461461
return None
462+
462463
if value[1] in ("x", "X"):
463-
return six.unichr(int(value[2:], 16))
464-
return six.unichr(int(value[1:], 10))
464+
# hex-encoded code point
465+
int_as_string, base = value[2:], 16
466+
else:
467+
# decimal code point
468+
int_as_string, base = value[1:], 10
469+
470+
if int_as_string == "":
471+
return None
472+
473+
code_point = int(int_as_string, base)
474+
if 0 < code_point < 0x110000:
475+
return six.unichr(code_point)
476+
else:
477+
return None
465478

466479
return ENTITIES.get(value, None)
467480

tests/test_html5lib_shim.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@
1919
("&xx;", "&xx;"),
2020
# Handles multiple entities in the same string
2121
("this &amp; that &amp; that", "this & that & that"),
22+
# Handles empty decimal and hex encoded code points
23+
("&#x;", "&#x;"),
24+
("&#;", "&#;"),
25+
# Handles too high unicode points
26+
("&#x110000;", "&#x110000;"),
27+
("&#x110111;", "&#x110111;"),
28+
("&#9277809;", "&#9277809;"),
29+
# Handles negative unicode points
30+
("&#-1;", "&#-1;"),
31+
("&#x-1;", "&#x-1;"),
2232
],
2333
)
2434
def test_convert_entities(data, expected):

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy