github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/pkg/regexp/manual.go (about)

     1  // Copyright 2019 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package regexp implements regular expression search.
    16  //
    17  // The syntax of the regular expressions accepted is the same
    18  // general syntax used by Perl, Python, and other languages.
    19  // More precisely, it is the syntax accepted by RE2 and described at
    20  // https://golang.org/s/re2syntax, except for \C.
    21  // For an overview of the syntax, run
    22  //   go doc regexp/syntax
    23  //
    24  // The regexp implementation provided by this package is
    25  // guaranteed to run in time linear in the size of the input.
    26  // (This is a property not guaranteed by most open source
    27  // implementations of regular expressions.) For more information
    28  // about this property, see
    29  //	https://swtch.com/~rsc/regexp/regexp1.html
    30  // or any book about automata theory.
    31  //
    32  // All characters are UTF-8-encoded code points.
    33  //
    34  // The regexp package functions match a regular expression and identify
    35  // the matched text. Their names are matched by this regular expression:
    36  //
    37  //	Find(All)?(Submatch)?
    38  //
    39  // If 'All' is present, the routine matches successive non-overlapping
    40  // matches of the entire expression. Empty matches abutting a preceding
    41  // match are ignored. The return value is a slice containing the successive
    42  // return values of the corresponding non-'All' routine. These routines take
    43  // an extra integer argument, n. If n >= 0, the function returns at most n
    44  // matches/submatches; otherwise, it returns all of them.
    45  //
    46  // If 'Submatch' is present, the return value is a slice identifying the
    47  // successive submatches of the expression. Submatches are matches of
    48  // parenthesized subexpressions (also known as capturing groups) within the
    49  // regular expression, numbered from left to right in order of opening
    50  // parenthesis. Submatch 0 is the match of the entire expression, submatch 1
    51  // the match of the first parenthesized subexpression, and so on.
    52  package regexp
    53  
    54  import (
    55  	"regexp"
    56  
    57  	"github.com/joomcode/cue/cue/errors"
    58  )
    59  
    60  var errNoMatch = errors.New("no match")
    61  
    62  // Find returns a list holding the text of the leftmost match in b of the regular expression.
    63  // A return value of bottom indicates no match.
    64  func Find(pattern, s string) (string, error) {
    65  	re, err := regexp.Compile(pattern)
    66  	if err != nil {
    67  		return "", err
    68  	}
    69  	m := re.FindStringIndex(s)
    70  	if m == nil {
    71  		return "", errNoMatch
    72  	}
    73  	return s[m[0]:m[1]], nil
    74  }
    75  
    76  // FindAll is the 'All' version of Find; it returns a list of all successive
    77  // matches of the expression, as defined by the 'All' description in the
    78  // package comment.
    79  // A return value of bottom indicates no match.
    80  func FindAll(pattern, s string, n int) ([]string, error) {
    81  	re, err := regexp.Compile(pattern)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	m := re.FindAllString(s, n)
    86  	if m == nil {
    87  		return nil, errNoMatch
    88  	}
    89  	return m, nil
    90  }
    91  
    92  // FindAllNamedSubmatch is like FindAllSubmatch, but returns a list of maps
    93  // with the named used in capturing groups. See FindNamedSubmatch for an
    94  // example on how to use named groups.
    95  func FindAllNamedSubmatch(pattern, s string, n int) ([]map[string]string, error) {
    96  	re, err := regexp.Compile(pattern)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	names := re.SubexpNames()
   101  	if len(names) == 0 {
   102  		return nil, errNoNamedGroup
   103  	}
   104  	m := re.FindAllStringSubmatch(s, n)
   105  	if m == nil {
   106  		return nil, errNoMatch
   107  	}
   108  	result := make([]map[string]string, len(m))
   109  	for i, m := range m {
   110  		r := make(map[string]string, len(names)-1)
   111  		for k, name := range names {
   112  			if name != "" {
   113  				r[name] = m[k]
   114  			}
   115  		}
   116  		result[i] = r
   117  	}
   118  	return result, nil
   119  }
   120  
   121  var errNoNamedGroup = errors.New("no named groups")
   122  
   123  // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a list
   124  // of all successive matches of the expression, as defined by the 'All'
   125  // description in the package comment.
   126  // A return value of bottom indicates no match.
   127  func FindAllSubmatch(pattern, s string, n int) ([][]string, error) {
   128  	re, err := regexp.Compile(pattern)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	m := re.FindAllStringSubmatch(s, n)
   133  	if m == nil {
   134  		return nil, errNoMatch
   135  	}
   136  	return m, nil
   137  }
   138  
   139  // FindNamedSubmatch is like FindSubmatch, but returns a map with the names used
   140  // in capturing groups.
   141  //
   142  // Example:
   143  //     regexp.FindNamedSubmatch(#"Hello (?P<person>\w*)!"#, "Hello World!")
   144  // Output:
   145  //     [{person: "World"}]
   146  //
   147  func FindNamedSubmatch(pattern, s string) (map[string]string, error) {
   148  	re, err := regexp.Compile(pattern)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	names := re.SubexpNames()
   153  	if len(names) == 0 {
   154  		return nil, errNoNamedGroup
   155  	}
   156  	m := re.FindStringSubmatch(s)
   157  	if m == nil {
   158  		return nil, errNoMatch
   159  	}
   160  	r := make(map[string]string, len(names)-1)
   161  	for k, name := range names {
   162  		if name != "" {
   163  			r[name] = m[k]
   164  		}
   165  	}
   166  	return r, nil
   167  }
   168  
   169  // FindSubmatch returns a list of lists holding the text of the leftmost
   170  // match of the regular expression in b and the matches, if any, of its
   171  // subexpressions, as defined by the 'Submatch' descriptions in the package
   172  // comment.
   173  // A return value of bottom indicates no match.
   174  func FindSubmatch(pattern, s string) ([]string, error) {
   175  	re, err := regexp.Compile(pattern)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  	m := re.FindStringSubmatch(s)
   180  	if m == nil {
   181  		return nil, errNoMatch
   182  	}
   183  	return m, nil
   184  }
   185  
   186  // ReplaceAll returns a copy of src, replacing variables in repl with
   187  // corresponding matches drawn from src, according to the following rules.
   188  //
   189  // In the template repl, a variable is denoted by a substring of the form $name
   190  // or ${name}, where name is a non-empty sequence of letters, digits, and
   191  // underscores. A purely numeric name like $1 refers to the submatch with the
   192  // corresponding index; other names refer to capturing parentheses named with
   193  // the (?P<name>...) syntax. A reference to an out of range or unmatched index
   194  // or a name that is not present in the regular expression is replaced with an
   195  // empty slice.
   196  //
   197  // In the $name form, name is taken to be as long as possible: $1x is
   198  // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
   199  //
   200  // To insert a literal $ in the output, use $$ in the template.
   201  func ReplaceAll(pattern, src, repl string) (string, error) {
   202  	re, err := regexp.Compile(pattern)
   203  	if err != nil {
   204  		return "", err
   205  	}
   206  	return re.ReplaceAllString(src, repl), nil
   207  }
   208  
   209  // ReplaceAllLiteral returns a copy of src, replacing matches of the regexp
   210  // pattern with the replacement string repl. The replacement repl is substituted
   211  // directly.
   212  func ReplaceAllLiteral(pattern, src, repl string) (string, error) {
   213  	re, err := regexp.Compile(pattern)
   214  	if err != nil {
   215  		return "", err
   216  	}
   217  	return re.ReplaceAllLiteralString(src, repl), nil
   218  }
   219  
   220  // Valid reports whether the given regular expression
   221  // is valid.
   222  func Valid(pattern string) (bool, error) {
   223  	_, err := regexp.Compile(pattern)
   224  	return err == nil, err
   225  }