Skip to content

Commit 0a87812

Browse files
[3.8] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563) (#119028)
(cherry picked from commit e237b25) Co-authored-by: Łukasz Langa <lukasz@langa.pl>
1 parent 7d1f50c commit 0a87812

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

Lib/test/test_urlparse.py

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,17 @@
7070

7171
class UrlParseTestCase(unittest.TestCase):
7272

73-
def checkRoundtrips(self, url, parsed, split):
73+
def checkRoundtrips(self, url, parsed, split, url2=None):
74+
if url2 is None:
75+
url2 = url
7476
result = urllib.parse.urlparse(url)
7577
self.assertEqual(result, parsed)
7678
t = (result.scheme, result.netloc, result.path,
7779
result.params, result.query, result.fragment)
7880
self.assertEqual(t, parsed)
7981
# put it back together and it should be the same
8082
result2 = urllib.parse.urlunparse(result)
81-
self.assertEqual(result2, url)
83+
self.assertEqual(result2, url2)
8284
self.assertEqual(result2, result.geturl())
8385

8486
# the result of geturl() is a fixpoint; we can always parse it
@@ -104,7 +106,7 @@ def checkRoundtrips(self, url, parsed, split):
104106
result.query, result.fragment)
105107
self.assertEqual(t, split)
106108
result2 = urllib.parse.urlunsplit(result)
107-
self.assertEqual(result2, url)
109+
self.assertEqual(result2, url2)
108110
self.assertEqual(result2, result.geturl())
109111

110112
# check the fixpoint property of re-parsing the result of geturl()
@@ -142,9 +144,39 @@ def test_qs(self):
142144

143145
def test_roundtrips(self):
144146
str_cases = [
147+
('path/to/file',
148+
('', '', 'path/to/file', '', '', ''),
149+
('', '', 'path/to/file', '', '')),
150+
('/path/to/file',
151+
('', '', '/path/to/file', '', '', ''),
152+
('', '', '/path/to/file', '', '')),
153+
('//path/to/file',
154+
('', 'path', '/to/file', '', '', ''),
155+
('', 'path', '/to/file', '', '')),
156+
('////path/to/file',
157+
('', '', '//path/to/file', '', '', ''),
158+
('', '', '//path/to/file', '', '')),
159+
('scheme:path/to/file',
160+
('scheme', '', 'path/to/file', '', '', ''),
161+
('scheme', '', 'path/to/file', '', '')),
162+
('scheme:/path/to/file',
163+
('scheme', '', '/path/to/file', '', '', ''),
164+
('scheme', '', '/path/to/file', '', '')),
165+
('scheme://path/to/file',
166+
('scheme', 'path', '/to/file', '', '', ''),
167+
('scheme', 'path', '/to/file', '', '')),
168+
('scheme:////path/to/file',
169+
('scheme', '', '//path/to/file', '', '', ''),
170+
('scheme', '', '//path/to/file', '', '')),
145171
('file:///tmp/junk.txt',
146172
('file', '', '/tmp/junk.txt', '', '', ''),
147173
('file', '', '/tmp/junk.txt', '', '')),
174+
('file:////tmp/junk.txt',
175+
('file', '', '//tmp/junk.txt', '', '', ''),
176+
('file', '', '//tmp/junk.txt', '', '')),
177+
('file://///tmp/junk.txt',
178+
('file', '', '///tmp/junk.txt', '', '', ''),
179+
('file', '', '///tmp/junk.txt', '', '')),
148180
('imap://mail.python.org/mbox1',
149181
('imap', 'mail.python.org', '/mbox1', '', '', ''),
150182
('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -175,6 +207,38 @@ def _encode(t):
175207
for url, parsed, split in str_cases + bytes_cases:
176208
self.checkRoundtrips(url, parsed, split)
177209

210+
def test_roundtrips_normalization(self):
211+
str_cases = [
212+
('///path/to/file',
213+
'/path/to/file',
214+
('', '', '/path/to/file', '', '', ''),
215+
('', '', '/path/to/file', '', '')),
216+
('scheme:///path/to/file',
217+
'scheme:/path/to/file',
218+
('scheme', '', '/path/to/file', '', '', ''),
219+
('scheme', '', '/path/to/file', '', '')),
220+
('file:/tmp/junk.txt',
221+
'file:///tmp/junk.txt',
222+
('file', '', '/tmp/junk.txt', '', '', ''),
223+
('file', '', '/tmp/junk.txt', '', '')),
224+
('http:/tmp/junk.txt',
225+
'http:///tmp/junk.txt',
226+
('http', '', '/tmp/junk.txt', '', '', ''),
227+
('http', '', '/tmp/junk.txt', '', '')),
228+
('https:/tmp/junk.txt',
229+
'https:///tmp/junk.txt',
230+
('https', '', '/tmp/junk.txt', '', '', ''),
231+
('https', '', '/tmp/junk.txt', '', '')),
232+
]
233+
def _encode(t):
234+
return (t[0].encode('ascii'),
235+
t[1].encode('ascii'),
236+
tuple(x.encode('ascii') for x in t[2]),
237+
tuple(x.encode('ascii') for x in t[3]))
238+
bytes_cases = [_encode(x) for x in str_cases]
239+
for url, url2, parsed, split in str_cases + bytes_cases:
240+
self.checkRoundtrips(url, parsed, split, url2)
241+
178242
def test_http_roundtrips(self):
179243
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
180244
# so we test both 'http:' and 'https:' in all the following.

Lib/urllib/parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ def urlunsplit(components):
512512
empty query; the RFC states that these are equivalent)."""
513513
scheme, netloc, url, query, fragment, _coerce_result = (
514514
_coerce_args(*components))
515-
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
515+
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
516516
if url and url[:1] != '/': url = '/' + url
517517
url = '//' + (netloc or '') + url
518518
if scheme:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
2+
Based on patch by Ashwin Ramaswami.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy