go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/text/intsetexpr/intsetexpr.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package intsetexpr implements parsing of expressions like `a{01..03}b`.
    16  //
    17  // It knows how to expand `a{01..03}b` into `[a01b, a02b, a03b]`.
    18  package intsetexpr
    19  
    20  import (
    21  	"fmt"
    22  	"strconv"
    23  	"strings"
    24  )
    25  
    26  // Expand expands a string with a int set into a list of strings.
    27  //
    28  // For example, given `a{1..3}b` produces `['a1b', 'a2b', 'a3b']`.
    29  //
    30  // The incoming string should have no more than one `{...}` section. If it's
    31  // absent, the function returns the list that contains one item: the original
    32  // string.
    33  //
    34  // The set is given as comma-separated list of terms. Each term is either
    35  // a single non-negative integer (e.g. `9`) or a range (e.g. `1..5`). Both ends
    36  // of the range are inclusive. Ranges where the left hand side is larger than
    37  // the right hand side are not allowed. All elements should be listed in the
    38  // strictly increasing order (e.g. `1,2,5..10` is fine, but `5..10,1,2` is
    39  // not). Spaces are not allowed.
    40  //
    41  // The output integers are padded with zeros to match the width of
    42  // corresponding terms. For ranges this works only if both sides have same
    43  // width. For example, `01,002,03..04` will expand into `01, 002, 03, 04`.
    44  //
    45  // Use `{{` and `}}` to escape `{` and `}` respectively.
    46  func Expand(s string) ([]string, error) {
    47  	// Fast path for strings that do not have sets at all.
    48  	if !strings.ContainsAny(s, "{}") {
    49  		return []string{s}, nil
    50  	}
    51  
    52  	// States for the parser state machine.
    53  	const (
    54  		StateBeforeLB = iota // scanning to find '{'
    55  		StateAfterRB         // after {...} block is read, scanning for end
    56  
    57  		// In comments below '|' denotes the position of the state machine.
    58  
    59  		StateRangeStart  // '{|1..4,5}' or '{|1,2}', expecting to read a number or '}'
    60  		StateCommaOrDots // '{1|..4,5}' or '{1|,2}, expecting either ',' or '..', or '}'
    61  		StateRangeEnd    // '{1..|4,5}', expecting to read a number
    62  		StateComma       // '{1..4|,5}', expecting ',' or '}'
    63  	)
    64  
    65  	// Represents e.g. "10..20", or just "10" if l == r
    66  	type rnge struct {
    67  		l, r uint64
    68  		fmt  string // either %d or e.g. %03d
    69  	}
    70  
    71  	var ranges []rnge     // all read ranges
    72  	var total int         // total number of output strings to expect
    73  	var rangeStart string // for currently constructed range
    74  
    75  	// addRange parses strings into ints and verifies ranges are in the increasing
    76  	// order. 'r' is empty for single-element terms e.g. "{2}".
    77  	addRange := func(l, r string) error {
    78  		li, err := strconv.ParseUint(l, 10, 64)
    79  		if err != nil {
    80  			return fmt.Errorf("integer %q is too large", l)
    81  		}
    82  
    83  		var ri uint64
    84  		if r != "" {
    85  			if ri, err = strconv.ParseUint(r, 10, 64); err != nil {
    86  				return fmt.Errorf("integer %q is too large", r)
    87  			}
    88  			// E.g. "5..2" is a bad range, should be "2..5". Same for "2..2".
    89  			if li >= ri {
    90  				return fmt.Errorf("bad range - %d is not larger than %d", ri, li)
    91  			}
    92  		} else {
    93  			// For e.g. "{2}".
    94  			ri = li
    95  			r = l
    96  		}
    97  
    98  		// E.g. "10,9" is bad, should be "9,10". Same for "9,9".
    99  		if len(ranges) > 0 {
   100  			if min := ranges[len(ranges)-1].r; min >= li {
   101  				return fmt.Errorf("the set is not in increasing order - %d is not larger than %d", li, min)
   102  			}
   103  		}
   104  
   105  		// If both strings have the same length, use it as padding for the output.
   106  		format := "%d"
   107  		if len(l) == len(r) {
   108  			format = fmt.Sprintf("%%0%dd", len(l))
   109  		}
   110  
   111  		ranges = append(ranges, rnge{li, ri, format})
   112  		total += int(ri-li) + 1
   113  		return nil
   114  	}
   115  
   116  	pfx := "" // everything before '{'
   117  	sfx := "" // everything after '}'
   118  
   119  	state := StateBeforeLB
   120  
   121  	for _, tok := range tokenize(s) {
   122  		switch state {
   123  		case StateBeforeLB:
   124  			switch tok.typ {
   125  			case TokLB:
   126  				state = StateRangeStart
   127  			case TokRB:
   128  				return nil, fmt.Errorf(`bad expression - "}" must appear after "{"`)
   129  			default:
   130  				pfx += tok.val
   131  			}
   132  
   133  		case StateAfterRB:
   134  			switch tok.typ {
   135  			case TokLB, TokRB:
   136  				return nil, fmt.Errorf(`bad expression - only one "{...}" section is allowed`)
   137  			default:
   138  				sfx += tok.val
   139  			}
   140  
   141  		case StateRangeStart:
   142  			switch tok.typ {
   143  			case TokNum:
   144  				rangeStart = tok.val
   145  				state = StateCommaOrDots
   146  			case TokRB:
   147  				state = StateAfterRB
   148  			default:
   149  				return nil, fmt.Errorf(`bad expression - expecting a number or "}", got %q`, tok.val)
   150  			}
   151  
   152  		case StateCommaOrDots:
   153  			switch tok.typ {
   154  			case TokComma:
   155  				if err := addRange(rangeStart, ""); err != nil {
   156  					return nil, err
   157  				}
   158  				state = StateRangeStart
   159  			case TokRB:
   160  				if err := addRange(rangeStart, ""); err != nil {
   161  					return nil, err
   162  				}
   163  				state = StateAfterRB
   164  			case TokDots:
   165  				state = StateRangeEnd
   166  			default:
   167  				return nil, fmt.Errorf(`bad expression - expecting ",", ".." or "}", got %q`, tok.val)
   168  			}
   169  
   170  		case StateRangeEnd:
   171  			switch tok.typ {
   172  			case TokNum:
   173  				if err := addRange(rangeStart, tok.val); err != nil {
   174  					return nil, err
   175  				}
   176  				state = StateComma
   177  			default:
   178  				return nil, fmt.Errorf(`bad expression - expecting a number, got %q`, tok.val)
   179  			}
   180  
   181  		case StateComma:
   182  			switch tok.typ {
   183  			case TokComma:
   184  				state = StateRangeStart
   185  			case TokRB:
   186  				state = StateAfterRB
   187  			default:
   188  				return nil, fmt.Errorf(`bad expression - expecting "," or "}", got %q`, tok.val)
   189  			}
   190  
   191  		default:
   192  			panic("impossible")
   193  		}
   194  	}
   195  
   196  	if len(ranges) == 0 {
   197  		return []string{pfx + sfx}, nil
   198  	}
   199  
   200  	out := make([]string, 0, total)
   201  	for _, rng := range ranges {
   202  		for i := rng.l; i <= rng.r; i++ {
   203  			out = append(out, fmt.Sprintf("%s"+rng.fmt+"%s", pfx, i, sfx))
   204  		}
   205  	}
   206  	return out, nil
   207  }
   208  
   209  ////////////////////////////////////////////////////////////////////////////////
   210  // Tokenizer.
   211  
   212  const (
   213  	TokLB    = iota // non-escaped '{'
   214  	TokRB           // non-escaped '}'
   215  	TokNum          // a sequence of digits
   216  	TokRunes        // an arbitrary sequence of non-special runes
   217  	TokComma        // ','
   218  	TokDots         // '..'
   219  )
   220  
   221  type token struct {
   222  	typ int    // one of TOK_* constants
   223  	val string // substring the token was parsed from
   224  }
   225  
   226  func tokenize(s string) (out []token) {
   227  	rs := []rune(s)
   228  
   229  	emit := func(tok int, val string) {
   230  		out = append(out, token{tok, val})
   231  	}
   232  
   233  	for i := 0; i < len(rs); i++ {
   234  		// Advances 'i' util rs[i] matches the predicate.
   235  		readUntil := func(pred func(r rune) bool) string {
   236  			start := i
   237  			for i < len(rs) && pred(rs[i]) {
   238  				i++
   239  			}
   240  			i-- // overstepped
   241  			return string(rs[start : i+1])
   242  		}
   243  
   244  		switch {
   245  		case rs[i] == '{':
   246  			// Escaped '{'?
   247  			if i != len(rs)-1 && rs[i+1] == '{' {
   248  				emit(TokRunes, "{")
   249  				i++ // consumed already
   250  			} else {
   251  				emit(TokLB, "{")
   252  			}
   253  		case rs[i] == '}':
   254  			// Escaped '}'?
   255  			if i != len(rs)-1 && rs[i+1] == '}' {
   256  				emit(TokRunes, "}")
   257  				i++ // consumed already
   258  			} else {
   259  				emit(TokRB, "}")
   260  			}
   261  		case rs[i] == ',':
   262  			emit(TokComma, ",")
   263  		case rs[i] == '.':
   264  			// ".."?
   265  			if i != len(rs)-1 && rs[i+1] == '.' {
   266  				emit(TokDots, "..")
   267  				i++ // consumed already
   268  			} else {
   269  				emit(TokRunes, ".") // regular single dot
   270  			}
   271  		case rs[i] >= '0' && rs[i] <= '9':
   272  			emit(TokNum, readUntil(func(r rune) bool {
   273  				return r >= '0' && r <= '9'
   274  			}))
   275  		default:
   276  			emit(TokRunes, readUntil(func(r rune) bool {
   277  				special := r == '{' ||
   278  					r == '}' ||
   279  					r == ',' ||
   280  					r == '.' ||
   281  					(r >= '0' && r <= '9')
   282  				return !special
   283  			}))
   284  		}
   285  	}
   286  
   287  	return
   288  }