github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/mods/re/re.go (about)

     1  // Package re implements a regular expression module.
     2  package re
     3  
     4  import (
     5  	"regexp"
     6  
     7  	"github.com/markusbkk/elvish/pkg/eval"
     8  	"github.com/markusbkk/elvish/pkg/eval/errs"
     9  	"github.com/markusbkk/elvish/pkg/eval/vals"
    10  )
    11  
    12  // Ns is the namespace for the re: module.
    13  var Ns = eval.BuildNsNamed("re").
    14  	AddGoFns(map[string]interface{}{
    15  		"quote":   regexp.QuoteMeta,
    16  		"match":   match,
    17  		"find":    find,
    18  		"replace": replace,
    19  		"split":   split,
    20  	}).Ns()
    21  
    22  //elvdoc:fn quote
    23  //
    24  // ```elvish
    25  // re:quote $string
    26  // ```
    27  //
    28  // Quote `$string` for use in a pattern. Examples:
    29  //
    30  // ```elvish-transcript
    31  // ~> re:quote a.txt
    32  // ▶ a\.txt
    33  // ~> re:quote '(*)'
    34  // ▶ '\(\*\)'
    35  // ```
    36  
    37  //elvdoc:fn match
    38  //
    39  // ```elvish
    40  // re:match &posix=$false $pattern $source
    41  // ```
    42  //
    43  // Determine whether `$pattern` matches `$source`. The pattern is not anchored.
    44  // Examples:
    45  //
    46  // ```elvish-transcript
    47  // ~> re:match . xyz
    48  // ▶ $true
    49  // ~> re:match . ''
    50  // ▶ $false
    51  // ~> re:match '[a-z]' A
    52  // ▶ $false
    53  // ```
    54  
    55  type matchOpts struct{ Posix bool }
    56  
    57  func (*matchOpts) SetDefaultOptions() {}
    58  
    59  func match(opts matchOpts, argPattern, source string) (bool, error) {
    60  	pattern, err := makePattern(argPattern, opts.Posix, false)
    61  	if err != nil {
    62  		return false, err
    63  	}
    64  	return pattern.MatchString(source), nil
    65  }
    66  
    67  //elvdoc:fn find
    68  //
    69  // ```elvish
    70  // re:find &posix=$false &longest=$false &max=-1 $pattern $source
    71  // ```
    72  //
    73  // Find all matches of `$pattern` in `$source`.
    74  //
    75  // Each match is represented by a map-like value `$m`; `$m[text]`, `$m[start]` and
    76  // `$m[end]` are the text, start and end positions (as byte indices into `$source`)
    77  // of the match; `$m[groups]` is a list of submatches for capture groups in the
    78  // pattern. A submatch has a similar structure to a match, except that it does not
    79  // have a `group` key. The entire pattern is an implicit capture group, and it
    80  // always appears first.
    81  //
    82  // Examples:
    83  //
    84  // ```elvish-transcript
    85  // ~> re:find . ab
    86  // ▶ [&text=a &start=0 &end=1 &groups=[[&text=a &start=0 &end=1]]]
    87  // ▶ [&text=b &start=1 &end=2 &groups=[[&text=b &start=1 &end=2]]]
    88  // ~> re:find '[A-Z]([0-9])' 'A1 B2'
    89  // ▶ [&text=A1 &start=0 &end=2 &groups=[[&text=A1 &start=0 &end=2] [&text=1 &start=1 &end=2]]]
    90  // ▶ [&text=B2 &start=3 &end=5 &groups=[[&text=B2 &start=3 &end=5] [&text=2 &start=4 &end=5]]]
    91  // ```
    92  
    93  // Struct for holding options to find. Also used by split.
    94  type findOpts struct {
    95  	Posix   bool
    96  	Longest bool
    97  	Max     int
    98  }
    99  
   100  func (o *findOpts) SetDefaultOptions() { o.Max = -1 }
   101  
   102  func find(fm *eval.Frame, opts findOpts, argPattern, source string) error {
   103  	out := fm.ValueOutput()
   104  
   105  	pattern, err := makePattern(argPattern, opts.Posix, opts.Longest)
   106  	if err != nil {
   107  		return err
   108  	}
   109  	matches := pattern.FindAllSubmatchIndex([]byte(source), opts.Max)
   110  
   111  	for _, match := range matches {
   112  		start, end := match[0], match[1]
   113  		groups := vals.EmptyList
   114  		for i := 0; i < len(match); i += 2 {
   115  			start, end := match[i], match[i+1]
   116  			text := ""
   117  			// FindAllSubmatchIndex may return negative indices to indicate
   118  			// that the pattern didn't appear in the text.
   119  			if start >= 0 && end >= 0 {
   120  				text = source[start:end]
   121  			}
   122  			groups = groups.Conj(submatchStruct{text, start, end})
   123  		}
   124  		err := out.Put(matchStruct{source[start:end], start, end, groups})
   125  		if err != nil {
   126  			return err
   127  		}
   128  	}
   129  	return nil
   130  }
   131  
   132  //elvdoc:fn replace
   133  //
   134  // ```elvish
   135  // re:replace &posix=$false &longest=$false &literal=$false $pattern $repl $source
   136  // ```
   137  //
   138  // Replace all occurrences of `$pattern` in `$source` with `$repl`.
   139  //
   140  // The replacement `$repl` can be any of the following:
   141  //
   142  // -   A string-typed replacement template. The template can use `$name` or
   143  //     `${name}` patterns to refer to capture groups, where `name` consists of
   144  //     letters, digits and underscores. A purely numeric patterns like `$1`
   145  //     refers to the capture group with the corresponding index; other names
   146  //     refer to capture groups named with the `(?P<name>...)`) syntax.
   147  //
   148  //     In the `$name` form, the name is taken to be as long as possible; `$1` is
   149  //     equivalent to `${1x}`, not `${1}x`; `$10` is equivalent to `${10}`, not `${1}0`.
   150  //
   151  //     To insert a literal `$`, use `$$`.
   152  //
   153  // -   A function that takes a string argument and outputs a string. For each
   154  //     match, the function is called with the content of the match, and its output
   155  //     is used as the replacement.
   156  //
   157  // If `$literal` is true, `$repl` must be a string and is treated literally instead
   158  // of as a pattern.
   159  //
   160  // Example:
   161  //
   162  // ```elvish-transcript
   163  // ~> re:replace '(ba|z)sh' '${1}SH' 'bash and zsh'
   164  // ▶ 'baSH and zSH'
   165  // ~> re:replace '(ba|z)sh' elvish 'bash and zsh rock'
   166  // ▶ 'elvish and elvish rock'
   167  // ~> re:replace '(ba|z)sh' {|x| put [&bash=BaSh &zsh=ZsH][$x] } 'bash and zsh'
   168  // ▶ 'BaSh and ZsH'
   169  // ```
   170  
   171  type replaceOpts struct {
   172  	Posix   bool
   173  	Longest bool
   174  	Literal bool
   175  }
   176  
   177  func (*replaceOpts) SetDefaultOptions() {}
   178  
   179  func replace(fm *eval.Frame, opts replaceOpts, argPattern string, argRepl interface{}, source string) (string, error) {
   180  
   181  	pattern, err := makePattern(argPattern, opts.Posix, opts.Longest)
   182  	if err != nil {
   183  		return "", err
   184  	}
   185  
   186  	if opts.Literal {
   187  		repl, ok := argRepl.(string)
   188  		if !ok {
   189  			return "", &errs.BadValue{What: "literal replacement",
   190  				Valid: "string", Actual: vals.Kind(argRepl)}
   191  		}
   192  		return pattern.ReplaceAllLiteralString(source, repl), nil
   193  	}
   194  	switch repl := argRepl.(type) {
   195  	case string:
   196  		return pattern.ReplaceAllString(source, repl), nil
   197  	case eval.Callable:
   198  		var errReplace error
   199  		replFunc := func(s string) string {
   200  			if errReplace != nil {
   201  				return ""
   202  			}
   203  			values, err := fm.CaptureOutput(func(fm *eval.Frame) error {
   204  				return repl.Call(fm, []interface{}{s}, eval.NoOpts)
   205  			})
   206  			if err != nil {
   207  				errReplace = err
   208  				return ""
   209  			}
   210  			if len(values) != 1 {
   211  				errReplace = &errs.ArityMismatch{What: "replacement function output",
   212  					ValidLow: 1, ValidHigh: 1, Actual: len(values)}
   213  				return ""
   214  			}
   215  			output, ok := values[0].(string)
   216  			if !ok {
   217  				errReplace = &errs.BadValue{What: "replacement function output",
   218  					Valid: "string", Actual: vals.Kind(values[0])}
   219  				return ""
   220  			}
   221  			return output
   222  		}
   223  		return pattern.ReplaceAllStringFunc(source, replFunc), errReplace
   224  	default:
   225  		return "", &errs.BadValue{What: "replacement",
   226  			Valid: "string or function", Actual: vals.Kind(argRepl)}
   227  	}
   228  }
   229  
   230  //elvdoc:fn split
   231  //
   232  // ```elvish
   233  // re:split &posix=$false &longest=$false &max=-1 $pattern $source
   234  // ```
   235  //
   236  // Split `$source`, using `$pattern` as separators. Examples:
   237  //
   238  // ```elvish-transcript
   239  // ~> re:split : /usr/sbin:/usr/bin:/bin
   240  // ▶ /usr/sbin
   241  // ▶ /usr/bin
   242  // ▶ /bin
   243  // ~> re:split &max=2 : /usr/sbin:/usr/bin:/bin
   244  // ▶ /usr/sbin
   245  // ▶ /usr/bin:/bin
   246  // ```
   247  
   248  func split(fm *eval.Frame, opts findOpts, argPattern, source string) error {
   249  	out := fm.ValueOutput()
   250  
   251  	pattern, err := makePattern(argPattern, opts.Posix, opts.Longest)
   252  	if err != nil {
   253  		return err
   254  	}
   255  
   256  	pieces := pattern.Split(source, opts.Max)
   257  	for _, piece := range pieces {
   258  		err := out.Put(piece)
   259  		if err != nil {
   260  			return err
   261  		}
   262  	}
   263  	return nil
   264  }
   265  
   266  func makePattern(p string, posix, longest bool) (*regexp.Regexp, error) {
   267  	pattern, err := compile(p, posix)
   268  	if err != nil {
   269  		return nil, err
   270  	}
   271  	if longest {
   272  		pattern.Longest()
   273  	}
   274  	return pattern, nil
   275  }
   276  
   277  func compile(pattern string, posix bool) (*regexp.Regexp, error) {
   278  	if posix {
   279  		return regexp.CompilePOSIX(pattern)
   280  	}
   281  	return regexp.Compile(pattern)
   282  }