Skip to content

Commit 72de5b3

Browse files
authored
chore: migrate to own glob parser (microsoft#2230)
1 parent 73616f4 commit 72de5b3

File tree

7 files changed

+123
-14
lines changed

7 files changed

+123
-14
lines changed

playwright/_impl/_glob.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright (c) Microsoft Corporation.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import re
15+
16+
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping
17+
escaped_chars = {"$", "^", "+", ".", "*", "(", ")", "|", "\\", "?", "{", "}", "[", "]"}
18+
19+
20+
def glob_to_regex(glob: str) -> "re.Pattern[str]":
21+
tokens = ["^"]
22+
in_group = False
23+
24+
i = 0
25+
while i < len(glob):
26+
c = glob[i]
27+
if c == "\\" and i + 1 < len(glob):
28+
char = glob[i + 1]
29+
tokens.append("\\" + char if char in escaped_chars else char)
30+
i += 1
31+
elif c == "*":
32+
before_deep = glob[i - 1] if i > 0 else None
33+
star_count = 1
34+
while i + 1 < len(glob) and glob[i + 1] == "*":
35+
star_count += 1
36+
i += 1
37+
after_deep = glob[i + 1] if i + 1 < len(glob) else None
38+
is_deep = (
39+
star_count > 1
40+
and (before_deep == "/" or before_deep is None)
41+
and (after_deep == "/" or after_deep is None)
42+
)
43+
if is_deep:
44+
tokens.append("((?:[^/]*(?:/|$))*)")
45+
i += 1
46+
else:
47+
tokens.append("([^/]*)")
48+
else:
49+
if c == "?":
50+
tokens.append(".")
51+
elif c == "[":
52+
tokens.append("[")
53+
elif c == "]":
54+
tokens.append("]")
55+
elif c == "{":
56+
in_group = True
57+
tokens.append("(")
58+
elif c == "}":
59+
in_group = False
60+
tokens.append(")")
61+
elif c == "," and in_group:
62+
tokens.append("|")
63+
else:
64+
tokens.append("\\" + c if c in escaped_chars else c)
65+
i += 1
66+
67+
tokens.append("$")
68+
return re.compile("".join(tokens))

playwright/_impl/_helper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import asyncio
15-
import fnmatch
1615
import inspect
1716
import math
1817
import os
@@ -41,6 +40,7 @@
4140

4241
from playwright._impl._api_structures import NameValue
4342
from playwright._impl._errors import Error, TargetClosedError, TimeoutError
43+
from playwright._impl._glob import glob_to_regex
4444
from playwright._impl._str_utils import escape_regex_flags
4545

4646
if sys.version_info >= (3, 8): # pragma: no cover
@@ -149,7 +149,7 @@ def __init__(self, base_url: Union[str, None], match: URLMatch) -> None:
149149
if isinstance(match, str):
150150
if base_url and not match.startswith("*"):
151151
match = urljoin(base_url, match)
152-
regex = fnmatch.translate(match)
152+
regex = glob_to_regex(match)
153153
self._regex_obj = re.compile(regex)
154154
elif isinstance(match, Pattern):
155155
self._regex_obj = match

tests/async/test_browsercontext_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,9 @@ async def handler_with_header_mods(route: Route) -> None:
185185
await context.route("**/*", handler_with_header_mods)
186186

187187
await page.goto(server.EMPTY_PAGE)
188-
async with page.expect_request("/sleep.zzz") as request_info:
188+
with server.expect_request("/sleep.zzz") as server_request_info:
189189
await page.evaluate("() => fetch('/sleep.zzz')")
190-
request = await request_info.value
191-
values.append(request.headers.get("foo"))
190+
values.append(server_request_info.value.getHeader("foo"))
192191
assert values == ["bar", "bar", "bar"]
193192

194193

tests/async/test_interception.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import pytest
2222

23+
from playwright._impl._glob import glob_to_regex
2324
from playwright.async_api import (
2425
Browser,
2526
BrowserContext,
@@ -1041,3 +1042,47 @@ async def handle_request(route: Route) -> None:
10411042
assert response
10421043
assert response.status == 200
10431044
assert await response.json() == {"foo": "bar"}
1045+
1046+
1047+
async def test_glob_to_regex() -> None:
1048+
assert glob_to_regex("**/*.js").match("https://localhost:8080/foo.js")
1049+
assert not glob_to_regex("**/*.css").match("https://localhost:8080/foo.js")
1050+
assert not glob_to_regex("*.js").match("https://localhost:8080/foo.js")
1051+
assert glob_to_regex("https://**/*.js").match("https://localhost:8080/foo.js")
1052+
assert glob_to_regex("http://localhost:8080/simple/path.js").match(
1053+
"http://localhost:8080/simple/path.js"
1054+
)
1055+
assert glob_to_regex("http://localhost:8080/?imple/path.js").match(
1056+
"http://localhost:8080/Simple/path.js"
1057+
)
1058+
assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/a.js")
1059+
assert glob_to_regex("**/{a,b}.js").match("https://localhost:8080/b.js")
1060+
assert not glob_to_regex("**/{a,b}.js").match("https://localhost:8080/c.js")
1061+
1062+
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpg")
1063+
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.jpeg")
1064+
assert glob_to_regex("**/*.{png,jpg,jpeg}").match("https://localhost:8080/c.png")
1065+
assert not glob_to_regex("**/*.{png,jpg,jpeg}").match(
1066+
"https://localhost:8080/c.css"
1067+
)
1068+
assert glob_to_regex("foo*").match("foo.js")
1069+
assert not glob_to_regex("foo*").match("foo/bar.js")
1070+
assert not glob_to_regex("http://localhost:3000/signin-oidc*").match(
1071+
"http://localhost:3000/signin-oidc/foo"
1072+
)
1073+
assert glob_to_regex("http://localhost:3000/signin-oidc*").match(
1074+
"http://localhost:3000/signin-oidcnice"
1075+
)
1076+
1077+
assert glob_to_regex("**/three-columns/settings.html?**id=[a-z]**").match(
1078+
"http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah"
1079+
)
1080+
1081+
assert glob_to_regex("\\?") == re.compile(r"^\?$")
1082+
assert glob_to_regex("\\") == re.compile(r"^\\$")
1083+
assert glob_to_regex("\\\\") == re.compile(r"^\\$")
1084+
assert glob_to_regex("\\[") == re.compile(r"^\[$")
1085+
assert glob_to_regex("[a-z]") == re.compile(r"^[a-z]$")
1086+
assert glob_to_regex("$^+.\\*()|\\?\\{\\}\\[\\]") == re.compile(
1087+
r"^\$\^\+\.\*\(\)\|\?\{\}\[\]$"
1088+
)

tests/async/test_page_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,9 @@ async def handler_with_header_mods(route: Route) -> None:
164164
await page.route("**/*", handler_with_header_mods)
165165

166166
await page.goto(server.EMPTY_PAGE)
167-
async with page.expect_request("/sleep.zzz") as request_info:
167+
with server.expect_request("/sleep.zzz") as server_request_info:
168168
await page.evaluate("() => fetch('/sleep.zzz')")
169-
request = await request_info.value
170-
values.append(request.headers.get("foo"))
169+
values.append(server_request_info.value.getHeader("foo"))
171170
assert values == ["bar", "bar", "bar"]
172171

173172

tests/sync/test_browsercontext_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,9 @@ def handler_with_header_mods(route: Route) -> None:
174174
context.route("**/*", handler_with_header_mods)
175175

176176
page.goto(server.EMPTY_PAGE)
177-
with page.expect_request("/sleep.zzz") as request_info:
177+
with server.expect_request("/sleep.zzz") as server_request_info:
178178
page.evaluate("() => fetch('/sleep.zzz')")
179-
request = request_info.value
180-
values.append(request.headers.get("foo"))
179+
values.append(server_request_info.value.getHeader("foo"))
181180
assert values == ["bar", "bar", "bar"]
182181

183182

tests/sync/test_page_request_fallback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,9 @@ def handler_with_header_mods(route: Route) -> None:
162162
page.route("**/*", handler_with_header_mods)
163163

164164
page.goto(server.EMPTY_PAGE)
165-
with page.expect_request("/sleep.zzz") as request_info:
165+
with server.expect_request("/sleep.zzz") as server_request_info:
166166
page.evaluate("() => fetch('/sleep.zzz')")
167-
request = request_info.value
168-
_append_with_return_value(values, request.headers.get("foo"))
167+
_append_with_return_value(values, server_request_info.value.getHeader("foo"))
169168
assert values == ["bar", "bar", "bar"]
170169

171170

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy