github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/pkg/regexp/manual.go (about) 1 // Copyright 2019 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package regexp implements regular expression search. 16 // 17 // The syntax of the regular expressions accepted is the same 18 // general syntax used by Perl, Python, and other languages. 19 // More precisely, it is the syntax accepted by RE2 and described at 20 // https://golang.org/s/re2syntax, except for \C. 21 // For an overview of the syntax, run 22 // go doc regexp/syntax 23 // 24 // The regexp implementation provided by this package is 25 // guaranteed to run in time linear in the size of the input. 26 // (This is a property not guaranteed by most open source 27 // implementations of regular expressions.) For more information 28 // about this property, see 29 // https://swtch.com/~rsc/regexp/regexp1.html 30 // or any book about automata theory. 31 // 32 // All characters are UTF-8-encoded code points. 33 // 34 // The regexp package functions match a regular expression and identify 35 // the matched text. Their names are matched by this regular expression: 36 // 37 // Find(All)?(Submatch)? 38 // 39 // If 'All' is present, the routine matches successive non-overlapping 40 // matches of the entire expression. Empty matches abutting a preceding 41 // match are ignored. The return value is a slice containing the successive 42 // return values of the corresponding non-'All' routine. These routines take 43 // an extra integer argument, n. If n >= 0, the function returns at most n 44 // matches/submatches; otherwise, it returns all of them. 45 // 46 // If 'Submatch' is present, the return value is a slice identifying the 47 // successive submatches of the expression. Submatches are matches of 48 // parenthesized subexpressions (also known as capturing groups) within the 49 // regular expression, numbered from left to right in order of opening 50 // parenthesis. Submatch 0 is the match of the entire expression, submatch 1 51 // the match of the first parenthesized subexpression, and so on. 52 package regexp 53 54 import ( 55 "regexp" 56 57 "github.com/joomcode/cue/cue/errors" 58 ) 59 60 var errNoMatch = errors.New("no match") 61 62 // Find returns a list holding the text of the leftmost match in b of the regular expression. 63 // A return value of bottom indicates no match. 64 func Find(pattern, s string) (string, error) { 65 re, err := regexp.Compile(pattern) 66 if err != nil { 67 return "", err 68 } 69 m := re.FindStringIndex(s) 70 if m == nil { 71 return "", errNoMatch 72 } 73 return s[m[0]:m[1]], nil 74 } 75 76 // FindAll is the 'All' version of Find; it returns a list of all successive 77 // matches of the expression, as defined by the 'All' description in the 78 // package comment. 79 // A return value of bottom indicates no match. 80 func FindAll(pattern, s string, n int) ([]string, error) { 81 re, err := regexp.Compile(pattern) 82 if err != nil { 83 return nil, err 84 } 85 m := re.FindAllString(s, n) 86 if m == nil { 87 return nil, errNoMatch 88 } 89 return m, nil 90 } 91 92 // FindAllNamedSubmatch is like FindAllSubmatch, but returns a list of maps 93 // with the named used in capturing groups. See FindNamedSubmatch for an 94 // example on how to use named groups. 95 func FindAllNamedSubmatch(pattern, s string, n int) ([]map[string]string, error) { 96 re, err := regexp.Compile(pattern) 97 if err != nil { 98 return nil, err 99 } 100 names := re.SubexpNames() 101 if len(names) == 0 { 102 return nil, errNoNamedGroup 103 } 104 m := re.FindAllStringSubmatch(s, n) 105 if m == nil { 106 return nil, errNoMatch 107 } 108 result := make([]map[string]string, len(m)) 109 for i, m := range m { 110 r := make(map[string]string, len(names)-1) 111 for k, name := range names { 112 if name != "" { 113 r[name] = m[k] 114 } 115 } 116 result[i] = r 117 } 118 return result, nil 119 } 120 121 var errNoNamedGroup = errors.New("no named groups") 122 123 // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a list 124 // of all successive matches of the expression, as defined by the 'All' 125 // description in the package comment. 126 // A return value of bottom indicates no match. 127 func FindAllSubmatch(pattern, s string, n int) ([][]string, error) { 128 re, err := regexp.Compile(pattern) 129 if err != nil { 130 return nil, err 131 } 132 m := re.FindAllStringSubmatch(s, n) 133 if m == nil { 134 return nil, errNoMatch 135 } 136 return m, nil 137 } 138 139 // FindNamedSubmatch is like FindSubmatch, but returns a map with the names used 140 // in capturing groups. 141 // 142 // Example: 143 // regexp.FindNamedSubmatch(#"Hello (?P<person>\w*)!"#, "Hello World!") 144 // Output: 145 // [{person: "World"}] 146 // 147 func FindNamedSubmatch(pattern, s string) (map[string]string, error) { 148 re, err := regexp.Compile(pattern) 149 if err != nil { 150 return nil, err 151 } 152 names := re.SubexpNames() 153 if len(names) == 0 { 154 return nil, errNoNamedGroup 155 } 156 m := re.FindStringSubmatch(s) 157 if m == nil { 158 return nil, errNoMatch 159 } 160 r := make(map[string]string, len(names)-1) 161 for k, name := range names { 162 if name != "" { 163 r[name] = m[k] 164 } 165 } 166 return r, nil 167 } 168 169 // FindSubmatch returns a list of lists holding the text of the leftmost 170 // match of the regular expression in b and the matches, if any, of its 171 // subexpressions, as defined by the 'Submatch' descriptions in the package 172 // comment. 173 // A return value of bottom indicates no match. 174 func FindSubmatch(pattern, s string) ([]string, error) { 175 re, err := regexp.Compile(pattern) 176 if err != nil { 177 return nil, err 178 } 179 m := re.FindStringSubmatch(s) 180 if m == nil { 181 return nil, errNoMatch 182 } 183 return m, nil 184 } 185 186 // ReplaceAll returns a copy of src, replacing variables in repl with 187 // corresponding matches drawn from src, according to the following rules. 188 // 189 // In the template repl, a variable is denoted by a substring of the form $name 190 // or ${name}, where name is a non-empty sequence of letters, digits, and 191 // underscores. A purely numeric name like $1 refers to the submatch with the 192 // corresponding index; other names refer to capturing parentheses named with 193 // the (?P<name>...) syntax. A reference to an out of range or unmatched index 194 // or a name that is not present in the regular expression is replaced with an 195 // empty slice. 196 // 197 // In the $name form, name is taken to be as long as possible: $1x is 198 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. 199 // 200 // To insert a literal $ in the output, use $$ in the template. 201 func ReplaceAll(pattern, src, repl string) (string, error) { 202 re, err := regexp.Compile(pattern) 203 if err != nil { 204 return "", err 205 } 206 return re.ReplaceAllString(src, repl), nil 207 } 208 209 // ReplaceAllLiteral returns a copy of src, replacing matches of the regexp 210 // pattern with the replacement string repl. The replacement repl is substituted 211 // directly. 212 func ReplaceAllLiteral(pattern, src, repl string) (string, error) { 213 re, err := regexp.Compile(pattern) 214 if err != nil { 215 return "", err 216 } 217 return re.ReplaceAllLiteralString(src, repl), nil 218 } 219 220 // Valid reports whether the given regular expression 221 // is valid. 222 func Valid(pattern string) (bool, error) { 223 _, err := regexp.Compile(pattern) 224 return err == nil, err 225 }