Skip to content

Commit 20d8ca8

Browse files
maskri17copybara-github
authored andcommitted
Aligning replace function in Regex ext with Google SQL
PiperOrigin-RevId: 773880708
1 parent 6d92e81 commit 20d8ca8

File tree

3 files changed

+122
-43
lines changed

3 files changed

+122
-43
lines changed

extensions/src/main/java/dev/cel/extensions/CelRegexExtensions.java

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ enum Function {
7878
String pattern = (String) args[1];
7979
String replaceStr = (String) args[2];
8080
long count = (long) args[3];
81-
return CelRegexExtensions.replace(target, pattern, replaceStr, count);
81+
return CelRegexExtensions.replaceN(target, pattern, replaceStr, count);
8282
}))),
8383
EXTRACT(
8484
CelFunctionDecl.newFunctionDeclaration(
@@ -153,18 +153,20 @@ private static Pattern compileRegexPattern(String regex) {
153153
}
154154

155155
private static String replace(String target, String regex, String replaceStr) {
156-
Pattern pattern = compileRegexPattern(regex);
157-
Matcher matcher = pattern.matcher(target);
158-
return matcher.replaceAll(replaceStr);
156+
return replaceN(target, regex, replaceStr, -1);
159157
}
160158

161-
private static String replace(String target, String regex, String replaceStr, long replaceCount) {
162-
Pattern pattern = compileRegexPattern(regex);
163-
159+
private static String replaceN(
160+
String target, String regex, String replaceStr, long replaceCount) {
164161
if (replaceCount == 0) {
165162
return target;
166163
}
164+
// For all negative replaceCount, do a replaceAll
165+
if (replaceCount < 0) {
166+
replaceCount = -1;
167+
}
167168

169+
Pattern pattern = compileRegexPattern(regex);
168170
Matcher matcher = pattern.matcher(target);
169171
StringBuffer sb = new StringBuffer();
170172
int counter = 0;
@@ -173,14 +175,59 @@ private static String replace(String target, String regex, String replaceStr, lo
173175
if (replaceCount != -1 && counter >= replaceCount) {
174176
break;
175177
}
176-
matcher.appendReplacement(sb, replaceStr);
178+
179+
String processedReplacement = replaceStrValidator(matcher, replaceStr);
180+
matcher.appendReplacement(sb, Matcher.quoteReplacement(processedReplacement));
177181
counter++;
178182
}
179183
matcher.appendTail(sb);
180184

181185
return sb.toString();
182186
}
183187

188+
private static String replaceStrValidator(Matcher matcher, String replacement) {
189+
StringBuilder sb = new StringBuilder();
190+
for (int i = 0; i < replacement.length(); i++) {
191+
char c = replacement.charAt(i);
192+
193+
if (c != '\\') {
194+
sb.append(c);
195+
continue;
196+
}
197+
198+
if (i + 1 >= replacement.length()) {
199+
throw new IllegalArgumentException("Invalid replacement string: \\ not allowed at end");
200+
}
201+
202+
char nextChar = replacement.charAt(++i);
203+
204+
if (Character.isDigit(nextChar)) {
205+
int groupNum = Character.digit(nextChar, 10);
206+
int groupCount = matcher.groupCount();
207+
208+
if (groupNum > groupCount) {
209+
throw new IllegalArgumentException(
210+
"Replacement string references group "
211+
+ groupNum
212+
+ " but regex has only "
213+
+ groupCount
214+
+ " group(s)");
215+
}
216+
217+
String groupValue = matcher.group(groupNum);
218+
if (groupValue != null) {
219+
sb.append(groupValue);
220+
}
221+
} else if (nextChar == '\\') {
222+
sb.append('\\');
223+
} else {
224+
throw new IllegalArgumentException(
225+
"Invalid replacement string: \\ must be followed by a digit");
226+
}
227+
}
228+
return sb.toString();
229+
}
230+
184231
private static Optional<String> extract(String target, String regex) {
185232
Pattern pattern = compileRegexPattern(regex);
186233
Matcher matcher = pattern.matcher(target);
@@ -215,11 +262,12 @@ private static ImmutableList<String> extractAll(String target, String regex) {
215262
while (matcher.find()) {
216263
if (hasOneGroup) {
217264
String group = matcher.group(1);
218-
// Add the captured group's content only if it's not null (e.g. optional group didn't match)
265+
// Add the captured group's content only if it's not null
219266
if (group != null) {
220267
builder.add(group);
221268
}
222-
} else { // No capturing groups (matcher.groupCount() == 0)
269+
} else {
270+
// No capturing groups
223271
builder.add(matcher.group(0));
224272
}
225273
}

extensions/src/main/java/dev/cel/extensions/README.md

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -717,12 +717,15 @@ chance for collision.
717717

718718
### Replace
719719

720-
The `regex.replace` function replaces all occurrences of a regex pattern in a
721-
string with a replacement string. Optionally, you can limit the number of
722-
replacements by providing a count argument. Both numeric ($N) and named
723-
(${name}) capture group references are supported in the replacement string, with
724-
validation for correctness. An error will be thrown for invalid regex or replace
725-
string.
720+
The `regex.replace` function replaces all non-overlapping substring of a regex
721+
pattern in the target string with a replacement string. Optionally, you can
722+
limit the number of replacements by providing a count argument. When the count
723+
is a negative number, the function acts as replace all. Only numeric (\N)
724+
capture group references are supported in the replacement string, with
725+
validation for correctness. Backslashed-escaped digits (\1 to \9) within the
726+
replacement argument can be used to insert text matching the corresponding
727+
parenthesized group in the regexp pattern. An error will be thrown for invalid
728+
regex or replace string.
726729

727730
```
728731
regex.replace(target: string, pattern: string, replacement: string) -> string
@@ -732,14 +735,16 @@ regex.replace(target: string, pattern: string, replacement: string, count: int)
732735
Examples:
733736

734737
```
738+
regex.replace('hello world hello', 'hello', 'hi') == 'hi world hi'
735739
regex.replace('banana', 'a', 'x', 0) == 'banana'
736740
regex.replace('banana', 'a', 'x', 1) == 'bxnana'
737741
regex.replace('banana', 'a', 'x', 2) == 'bxnxna'
738-
regex.replace('foo bar', '(fo)o (ba)r', '$2 $1') == 'ba fo'
742+
regex.replace('banana', 'a', 'x', -12) == 'bxnxnx'
743+
regex.replace('foo bar', '(fo)o (ba)r', '\\2 \\1') == 'ba fo'
739744
740745
regex.replace('test', '(.)', '$2') \\ Runtime Error invalid replace string
741746
regex.replace('foo bar', '(', '$2 $1') \\ Runtime Error invalid regex string
742-
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: ${values}') \\ Runtime Error invalid replace string
747+
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: \\values') \\ Runtime Error invalid replace string
743748
744749
```
745750

extensions/src/test/java/dev/cel/extensions/CelRegexExtensionsTest.java

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,31 @@ public final class CelRegexExtensionsTest {
3939
CelRuntimeFactory.standardCelRuntimeBuilder().addLibraries(CelExtensions.regex()).build();
4040

4141
@Test
42-
@TestParameters("{target: 'foo bar', regex: '(fo)o (ba)r', replaceStr: '$2 $1', res: 'ba fo'}")
42+
@TestParameters("{target: 'abc', regex: '^', replaceStr: 'start_', res: 'start_abc'}")
43+
@TestParameters("{target: 'abc', regex: '$', replaceStr: '_end', res: 'abc_end'}")
44+
@TestParameters("{target: 'a-b', regex: '\\\\b', replaceStr: '|', res: '|a|-|b|'}")
45+
@TestParameters(
46+
"{target: 'foo bar', regex: '(fo)o (ba)r', replaceStr: '\\\\2 \\\\1', res: 'ba fo'}")
47+
@TestParameters("{target: 'foo bar', regex: 'foo', replaceStr: '\\\\\\\\', res: '\\ bar'}")
4348
@TestParameters("{target: 'banana', regex: 'ana', replaceStr: 'x', res: 'bxna'}")
44-
@TestParameters("{target: 'abc', regex: 'b(.)', replaceStr: 'x$1', res: 'axc'}")
49+
@TestParameters("{target: 'abc', regex: 'b(.)', replaceStr: 'x\\\\1', res: 'axc'}")
4550
@TestParameters(
4651
"{target: 'hello world hello', regex: 'hello', replaceStr: 'hi', res: 'hi world hi'}")
52+
@TestParameters("{target: 'ac', regex: 'a(b)?c', replaceStr: '[\\\\1]', res: '[]'}")
4753
@TestParameters("{target: 'apple pie', regex: 'p', replaceStr: 'X', res: 'aXXle Xie'}")
4854
@TestParameters(
4955
"{target: 'remove all spaces', regex: '\\\\s', replaceStr: '', res: 'removeallspaces'}")
5056
@TestParameters("{target: 'digit:99919291992', regex: '\\\\d+', replaceStr: '3', res: 'digit:3'}")
5157
@TestParameters(
52-
"{target: 'foo bar baz', regex: '\\\\w+', replaceStr: '($0)', res: '(foo) (bar) (baz)'}")
58+
"{target: 'foo bar baz', regex: '\\\\w+', replaceStr: '(\\\\0)', res: '(foo) (bar) (baz)'}")
5359
@TestParameters("{target: '', regex: 'a', replaceStr: 'b', res: ''}")
5460
@TestParameters(
5561
"{target: 'User: Alice, Age: 30', regex: 'User: (?P<name>\\\\w+), Age: (?P<age>\\\\d+)',"
56-
+ " replaceStr: '${name} is ${age} years old', res: 'Alice is 30 years old'}")
62+
+ " replaceStr: '${name} is ${age} years old', res: '${name} is ${age} years old'}")
5763
@TestParameters(
58-
"{target: 'abc', regex: '(?P<letter>b)', replaceStr: '[${letter}]', res: 'a[b]c'}")
64+
"{target: 'User: Alice, Age: 30', regex: 'User: (?P<name>\\\\w+), Age: (?P<age>\\\\d+)',"
65+
+ " replaceStr: '\\\\1 is \\\\2 years old', res: 'Alice is 30 years old'}")
66+
@TestParameters("{target: 'hello ☃', regex: '☃', replaceStr: '❄', res: 'hello ❄'}")
5967
public void replaceAll_success(String target, String regex, String replaceStr, String res)
6068
throws Exception {
6169
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
@@ -70,8 +78,8 @@ public void replaceAll_success(String target, String regex, String replaceStr, S
7078
public void replace_nested_success() throws Exception {
7179
String expr =
7280
"regex.replace("
73-
+ " regex.replace('%(foo) %(bar) %2','%\\\\((\\\\w+)\\\\)','\\\\${$1}'),"
74-
+ " '%(\\\\d+)', '\\\\$$1')";
81+
+ " regex.replace('%(foo) %(bar) %2','%\\\\((\\\\w+)\\\\)','${\\\\1}'),"
82+
+ " '%(\\\\d+)', '$\\\\1')";
7583
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile(expr).getAst());
7684

7785
Object result = program.eval();
@@ -85,19 +93,18 @@ public void replace_nested_success() throws Exception {
8593
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: 2, res: 'bxnxna'}")
8694
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: 100, res: 'bxnxnx'}")
8795
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -1, res: 'bxnxnx'}")
88-
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -100, res: 'banana'}")
96+
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -100, res: 'bxnxnx'}")
8997
@TestParameters(
90-
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '$2-$1', i: 1,"
98+
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '\\\\2-\\\\1', i: 1,"
9199
+ " res: 'dog-cat dog-cat cat-dog dog-cat'}")
92100
@TestParameters(
93-
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '$2-$1', i: 2, res: 'dog-cat"
94-
+ " dog-cat dog-cat dog-cat'}")
101+
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '\\\\2-\\\\1', i: 2, res:"
102+
+ " 'dog-cat dog-cat dog-cat dog-cat'}")
95103
@TestParameters("{t: 'a.b.c', re: '\\\\.', rep: '-', i: 1, res: 'a-b.c'}")
96104
@TestParameters("{t: 'a.b.c', re: '\\\\.', rep: '-', i: -1, res: 'a-b-c'}")
97105
public void replaceCount_success(String t, String re, String rep, long i, String res)
98106
throws Exception {
99107
String expr = String.format("regex.replace('%s', '%s', '%s', %d)", t, re, rep, i);
100-
System.out.println("expr: " + expr);
101108
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile(expr).getAst());
102109

103110
Object result = program.eval();
@@ -108,7 +115,7 @@ public void replaceCount_success(String t, String re, String rep, long i, String
108115
@Test
109116
@TestParameters("{target: 'foo bar', regex: '(', replaceStr: '$2 $1'}")
110117
@TestParameters("{target: 'foo bar', regex: '[a-z', replaceStr: '$2 $1'}")
111-
public void replace_invalid_regex(String target, String regex, String replaceStr)
118+
public void replace_invalidRegex_throwsException(String target, String regex, String replaceStr)
112119
throws Exception {
113120
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
114121
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
@@ -121,32 +128,48 @@ public void replace_invalid_regex(String target, String regex, String replaceStr
121128
}
122129

123130
@Test
124-
@TestParameters("{target: 'test', regex: '(.)', replaceStr: '$2'}")
125-
public void replace_invalid_captureGroup(String target, String regex, String replaceStr)
126-
throws Exception {
127-
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
131+
public void replace_invalidCaptureGroupReplaceStr_throwsException() throws Exception {
132+
String expr = "regex.replace('test', '(.)', '\\\\2')";
128133
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
129134

130135
CelEvaluationException e =
131136
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
132137

133-
assertThat(e).hasCauseThat().isInstanceOf(IndexOutOfBoundsException.class);
134-
assertThat(e).hasCauseThat().hasMessageThat().contains("n > number of groups");
138+
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
139+
assertThat(e)
140+
.hasCauseThat()
141+
.hasMessageThat()
142+
.contains("Replacement string references group 2 but regex has only 1 group(s)");
135143
}
136144

137145
@Test
138-
@TestParameters(
139-
"{target: 'id=123', regex: 'id=(?P<value>\\\\d+)', replaceStr: 'value: ${values}'}")
140-
public void replace_invalid_replaceStr(String target, String regex, String replaceStr)
141-
throws Exception {
142-
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
146+
public void replace_trailingBackslashReplaceStr_throwsException() throws Exception {
147+
String expr = "regex.replace('id=123', 'id=(?P<value>\\\\d+)', '\\\\')";
148+
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
149+
150+
CelEvaluationException e =
151+
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
152+
153+
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
154+
assertThat(e)
155+
.hasCauseThat()
156+
.hasMessageThat()
157+
.contains("Invalid replacement string: \\ not allowed at end");
158+
}
159+
160+
@Test
161+
public void replace_invalidGroupReferenceReplaceStr_throwsException() throws Exception {
162+
String expr = "regex.replace('id=123', 'id=(?P<value>\\\\d+)', '\\\\a')";
143163
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
144164

145165
CelEvaluationException e =
146166
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
147167

148168
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
149-
assertThat(e).hasCauseThat().hasMessageThat().contains("group 'values' not found");
169+
assertThat(e)
170+
.hasCauseThat()
171+
.hasMessageThat()
172+
.contains("Invalid replacement string: \\ must be followed by a digit");
150173
}
151174

152175
@Test
@@ -208,6 +231,9 @@ private enum ExtractAllTestCase {
208231
NO_MATCH("regex.extractAll('id:123, id:456', 'assa')", ImmutableList.of()),
209232
NO_CAPTURE_GROUP(
210233
"regex.extractAll('id:123, id:456', 'id:\\\\d+')", ImmutableList.of("id:123", "id:456")),
234+
CAPTURE_GROUP(
235+
"regex.extractAll('key=\"\", key=\"val\"', 'key=\"([^\"]*)\"')",
236+
ImmutableList.of("", "val")),
211237
SINGLE_NAMED_GROUP(
212238
"regex.extractAll('testuser@testdomain', '(?P<username>.*)@')",
213239
ImmutableList.of("testuser")),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy