go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/starlark/builtins/regexp.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package builtins
    16  
    17  import (
    18  	"regexp"
    19  	"sync"
    20  
    21  	"go.starlark.net/starlark"
    22  )
    23  
    24  // RegexpMatcher returns a function (with given name) that allows Starlark code
    25  // to do regular expression matches:
    26  //
    27  //	def submatches(pattern, str):
    28  //	  """Returns a tuple of submatches with the leftmost match of the regular
    29  //	  expression.
    30  //
    31  //	  The returned tuple has the full match as a first item, followed by
    32  //	  subexpression matches.
    33  //
    34  //	  If the string doesn't match the expression returns an empty tuple. Fails if
    35  //	  the regular expression can't be compiled.
    36  //	  """
    37  //
    38  // Uses Go regexp engine, which is slightly different from Python's. API also
    39  // explicitly does NOT try to mimic Python's 're' module.
    40  //
    41  // Each separate instance of the builtin holds a cache of compiled regular
    42  // expressions internally. The cache is never cleaned up. Errors are not cached,
    43  // since we don't expect to see them often.
    44  //
    45  // Safe for concurrent use.
    46  func RegexpMatcher(name string) *starlark.Builtin {
    47  	cache := regexpCache{r: make(map[string]*regexp.Regexp)}
    48  	return starlark.NewBuiltin(name, func(_ *starlark.Thread, _ *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
    49  		var pattern, str starlark.String
    50  		err := starlark.UnpackArgs(name, args, kwargs,
    51  			"pattern", &pattern,
    52  			"str", &str,
    53  		)
    54  		if err != nil {
    55  			return nil, err
    56  		}
    57  		groups, err := cache.matches(pattern.GoString(), str.GoString())
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  		tup := make(starlark.Tuple, len(groups))
    62  		for i, s := range groups {
    63  			tup[i] = starlark.String(s)
    64  		}
    65  		return tup, nil
    66  	})
    67  }
    68  
    69  type regexpCache struct {
    70  	m sync.RWMutex
    71  	r map[string]*regexp.Regexp
    72  }
    73  
    74  func (c *regexpCache) matches(pat, str string) ([]string, error) {
    75  	exp, err := c.exp(pat)
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	return exp.FindStringSubmatch(str), nil
    80  }
    81  
    82  func (c *regexpCache) exp(pat string) (*regexp.Regexp, error) {
    83  	c.m.RLock()
    84  	exp, _ := c.r[pat]
    85  	c.m.RUnlock()
    86  	if exp != nil {
    87  		return exp, nil
    88  	}
    89  
    90  	c.m.Lock()
    91  	defer c.m.Unlock()
    92  	if exp, _ = c.r[pat]; exp != nil {
    93  		return exp, nil
    94  	}
    95  
    96  	exp, err := regexp.Compile(pat)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	c.r[pat] = exp
   101  	return exp, nil
   102  }