Skip to content

Commit 782fcb4

Browse files
sylvinusmvdan
authored andcommitted
regexp: add (*Regexp).SubexpIndex
SubexpIndex returns the index of the first subexpression with the given name, or -1 if there is no subexpression with that name. Fixes #32420 Change-Id: Ie1f9d22d50fb84e18added80a9d9a9f6dca8ffc4 Reviewed-on: https://go-review.googlesource.com/c/go/+/187919 Run-TryBot: Ian Lance Taylor <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Daniel Martí <[email protected]>
1 parent 245409e commit 782fcb4

File tree

3 files changed

+59
-14
lines changed

3 files changed

+59
-14
lines changed

src/regexp/all_test.go

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -418,24 +418,32 @@ func TestLiteralPrefix(t *testing.T) {
418418
}
419419
}
420420

421+
type subexpIndex struct {
422+
name string
423+
index int
424+
}
425+
421426
type subexpCase struct {
422-
input string
423-
num int
424-
names []string
427+
input string
428+
num int
429+
names []string
430+
indices []subexpIndex
425431
}
426432

433+
var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}
434+
427435
var subexpCases = []subexpCase{
428-
{``, 0, nil},
429-
{`.*`, 0, nil},
430-
{`abba`, 0, nil},
431-
{`ab(b)a`, 1, []string{"", ""}},
432-
{`ab(.*)a`, 1, []string{"", ""}},
433-
{`(.*)ab(.*)a`, 2, []string{"", "", ""}},
434-
{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}},
435-
{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}},
436-
{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}},
437-
{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}},
438-
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}},
436+
{``, 0, nil, emptySubexpIndices},
437+
{`.*`, 0, nil, emptySubexpIndices},
438+
{`abba`, 0, nil, emptySubexpIndices},
439+
{`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
440+
{`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
441+
{`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
442+
{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
443+
{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
444+
{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
445+
{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
446+
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
439447
}
440448

441449
func TestSubexp(t *testing.T) {
@@ -458,6 +466,12 @@ func TestSubexp(t *testing.T) {
458466
}
459467
}
460468
}
469+
for _, subexp := range c.indices {
470+
index := re.SubexpIndex(subexp.name)
471+
if index != subexp.index {
472+
t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
473+
}
474+
}
461475
}
462476
}
463477

src/regexp/example_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,19 @@ func ExampleRegexp_SubexpNames() {
280280
// Turing Alan
281281
}
282282

283+
func ExampleRegexp_SubexpIndex() {
284+
re := regexp.MustCompile(`(?P<first>[a-zA-Z]+) (?P<last>[a-zA-Z]+)`)
285+
fmt.Println(re.MatchString("Alan Turing"))
286+
matches := re.FindStringSubmatch("Alan Turing")
287+
lastIndex := re.SubexpIndex("last")
288+
fmt.Printf("last => %d\n", lastIndex)
289+
fmt.Println(matches[lastIndex])
290+
// Output:
291+
// true
292+
// last => 2
293+
// Turing
294+
}
295+
283296
func ExampleRegexp_Split() {
284297
a := regexp.MustCompile(`a`)
285298
fmt.Println(a.Split("banana", -1))

src/regexp/regexp.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,24 @@ func (re *Regexp) SubexpNames() []string {
345345
return re.subexpNames
346346
}
347347

348+
// SubexpIndex returns the index of the first subexpression with the given name,
349+
// or -1 if there is no subexpression with that name.
350+
//
351+
// Note that multiple subexpressions can be written using the same name, as in
352+
// (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob".
353+
// In this case, SubexpIndex returns the index of the leftmost such subexpression
354+
// in the regular expression.
355+
func (re *Regexp) SubexpIndex(name string) int {
356+
if name != "" {
357+
for i, s := range re.subexpNames {
358+
if name == s {
359+
return i
360+
}
361+
}
362+
}
363+
return -1
364+
}
365+
348366
const endOfText rune = -1
349367

350368
// input abstracts different representations of the input text. It provides

0 commit comments

Comments
 (0)