Skip to content

Commit d0e8c10

Browse files
[3.11] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563) (#119025)
(cherry picked from commit e237b25)
1 parent f7c0f09 commit d0e8c10

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

Lib/test/test_urlparse.py

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,17 @@
103103

104104
class UrlParseTestCase(unittest.TestCase):
105105

106-
def checkRoundtrips(self, url, parsed, split):
106+
def checkRoundtrips(self, url, parsed, split, url2=None):
107+
if url2 is None:
108+
url2 = url
107109
result = urllib.parse.urlparse(url)
108110
self.assertEqual(result, parsed)
109111
t = (result.scheme, result.netloc, result.path,
110112
result.params, result.query, result.fragment)
111113
self.assertEqual(t, parsed)
112114
# put it back together and it should be the same
113115
result2 = urllib.parse.urlunparse(result)
114-
self.assertEqual(result2, url)
116+
self.assertEqual(result2, url2)
115117
self.assertEqual(result2, result.geturl())
116118

117119
# the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
137139
result.query, result.fragment)
138140
self.assertEqual(t, split)
139141
result2 = urllib.parse.urlunsplit(result)
140-
self.assertEqual(result2, url)
142+
self.assertEqual(result2, url2)
141143
self.assertEqual(result2, result.geturl())
142144

143145
# check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ def test_qs(self):
175177

176178
def test_roundtrips(self):
177179
str_cases = [
180+
('path/to/file',
181+
('', '', 'path/to/file', '', '', ''),
182+
('', '', 'path/to/file', '', '')),
183+
('/path/to/file',
184+
('', '', '/path/to/file', '', '', ''),
185+
('', '', '/path/to/file', '', '')),
186+
('//path/to/file',
187+
('', 'path', '/to/file', '', '', ''),
188+
('', 'path', '/to/file', '', '')),
189+
('////path/to/file',
190+
('', '', '//path/to/file', '', '', ''),
191+
('', '', '//path/to/file', '', '')),
192+
('scheme:path/to/file',
193+
('scheme', '', 'path/to/file', '', '', ''),
194+
('scheme', '', 'path/to/file', '', '')),
195+
('scheme:/path/to/file',
196+
('scheme', '', '/path/to/file', '', '', ''),
197+
('scheme', '', '/path/to/file', '', '')),
198+
('scheme://path/to/file',
199+
('scheme', 'path', '/to/file', '', '', ''),
200+
('scheme', 'path', '/to/file', '', '')),
201+
('scheme:////path/to/file',
202+
('scheme', '', '//path/to/file', '', '', ''),
203+
('scheme', '', '//path/to/file', '', '')),
178204
('file:///tmp/junk.txt',
179205
('file', '', '/tmp/junk.txt', '', '', ''),
180206
('file', '', '/tmp/junk.txt', '', '')),
207+
('file:////tmp/junk.txt',
208+
('file', '', '//tmp/junk.txt', '', '', ''),
209+
('file', '', '//tmp/junk.txt', '', '')),
210+
('file://///tmp/junk.txt',
211+
('file', '', '///tmp/junk.txt', '', '', ''),
212+
('file', '', '///tmp/junk.txt', '', '')),
181213
('imap://mail.python.org/mbox1',
182214
('imap', 'mail.python.org', '/mbox1', '', '', ''),
183215
('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -208,6 +240,38 @@ def _encode(t):
208240
for url, parsed, split in str_cases + bytes_cases:
209241
self.checkRoundtrips(url, parsed, split)
210242

243+
def test_roundtrips_normalization(self):
244+
str_cases = [
245+
('///path/to/file',
246+
'/path/to/file',
247+
('', '', '/path/to/file', '', '', ''),
248+
('', '', '/path/to/file', '', '')),
249+
('scheme:///path/to/file',
250+
'scheme:/path/to/file',
251+
('scheme', '', '/path/to/file', '', '', ''),
252+
('scheme', '', '/path/to/file', '', '')),
253+
('file:/tmp/junk.txt',
254+
'file:///tmp/junk.txt',
255+
('file', '', '/tmp/junk.txt', '', '', ''),
256+
('file', '', '/tmp/junk.txt', '', '')),
257+
('http:/tmp/junk.txt',
258+
'http:///tmp/junk.txt',
259+
('http', '', '/tmp/junk.txt', '', '', ''),
260+
('http', '', '/tmp/junk.txt', '', '')),
261+
('https:/tmp/junk.txt',
262+
'https:///tmp/junk.txt',
263+
('https', '', '/tmp/junk.txt', '', '', ''),
264+
('https', '', '/tmp/junk.txt', '', '')),
265+
]
266+
def _encode(t):
267+
return (t[0].encode('ascii'),
268+
t[1].encode('ascii'),
269+
tuple(x.encode('ascii') for x in t[2]),
270+
tuple(x.encode('ascii') for x in t[3]))
271+
bytes_cases = [_encode(x) for x in str_cases]
272+
for url, url2, parsed, split in str_cases + bytes_cases:
273+
self.checkRoundtrips(url, parsed, split, url2)
274+
211275
def test_http_roundtrips(self):
212276
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
213277
# so we test both 'http:' and 'https:' in all the following.

Lib/urllib/parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ def urlunsplit(components):
525525
empty query; the RFC states that these are equivalent)."""
526526
scheme, netloc, url, query, fragment, _coerce_result = (
527527
_coerce_args(*components))
528-
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
528+
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
529529
if url and url[:1] != '/': url = '/' + url
530530
url = '//' + (netloc or '') + url
531531
if scheme:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
2+
Based on patch by Ashwin Ramaswami.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy