go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/text/intsetexpr/intsetexpr.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package intsetexpr implements parsing of expressions like `a{01..03}b`. 16 // 17 // It knows how to expand `a{01..03}b` into `[a01b, a02b, a03b]`. 18 package intsetexpr 19 20 import ( 21 "fmt" 22 "strconv" 23 "strings" 24 ) 25 26 // Expand expands a string with a int set into a list of strings. 27 // 28 // For example, given `a{1..3}b` produces `['a1b', 'a2b', 'a3b']`. 29 // 30 // The incoming string should have no more than one `{...}` section. If it's 31 // absent, the function returns the list that contains one item: the original 32 // string. 33 // 34 // The set is given as comma-separated list of terms. Each term is either 35 // a single non-negative integer (e.g. `9`) or a range (e.g. `1..5`). Both ends 36 // of the range are inclusive. Ranges where the left hand side is larger than 37 // the right hand side are not allowed. All elements should be listed in the 38 // strictly increasing order (e.g. `1,2,5..10` is fine, but `5..10,1,2` is 39 // not). Spaces are not allowed. 40 // 41 // The output integers are padded with zeros to match the width of 42 // corresponding terms. For ranges this works only if both sides have same 43 // width. For example, `01,002,03..04` will expand into `01, 002, 03, 04`. 44 // 45 // Use `{{` and `}}` to escape `{` and `}` respectively. 46 func Expand(s string) ([]string, error) { 47 // Fast path for strings that do not have sets at all. 48 if !strings.ContainsAny(s, "{}") { 49 return []string{s}, nil 50 } 51 52 // States for the parser state machine. 53 const ( 54 StateBeforeLB = iota // scanning to find '{' 55 StateAfterRB // after {...} block is read, scanning for end 56 57 // In comments below '|' denotes the position of the state machine. 58 59 StateRangeStart // '{|1..4,5}' or '{|1,2}', expecting to read a number or '}' 60 StateCommaOrDots // '{1|..4,5}' or '{1|,2}, expecting either ',' or '..', or '}' 61 StateRangeEnd // '{1..|4,5}', expecting to read a number 62 StateComma // '{1..4|,5}', expecting ',' or '}' 63 ) 64 65 // Represents e.g. "10..20", or just "10" if l == r 66 type rnge struct { 67 l, r uint64 68 fmt string // either %d or e.g. %03d 69 } 70 71 var ranges []rnge // all read ranges 72 var total int // total number of output strings to expect 73 var rangeStart string // for currently constructed range 74 75 // addRange parses strings into ints and verifies ranges are in the increasing 76 // order. 'r' is empty for single-element terms e.g. "{2}". 77 addRange := func(l, r string) error { 78 li, err := strconv.ParseUint(l, 10, 64) 79 if err != nil { 80 return fmt.Errorf("integer %q is too large", l) 81 } 82 83 var ri uint64 84 if r != "" { 85 if ri, err = strconv.ParseUint(r, 10, 64); err != nil { 86 return fmt.Errorf("integer %q is too large", r) 87 } 88 // E.g. "5..2" is a bad range, should be "2..5". Same for "2..2". 89 if li >= ri { 90 return fmt.Errorf("bad range - %d is not larger than %d", ri, li) 91 } 92 } else { 93 // For e.g. "{2}". 94 ri = li 95 r = l 96 } 97 98 // E.g. "10,9" is bad, should be "9,10". Same for "9,9". 99 if len(ranges) > 0 { 100 if min := ranges[len(ranges)-1].r; min >= li { 101 return fmt.Errorf("the set is not in increasing order - %d is not larger than %d", li, min) 102 } 103 } 104 105 // If both strings have the same length, use it as padding for the output. 106 format := "%d" 107 if len(l) == len(r) { 108 format = fmt.Sprintf("%%0%dd", len(l)) 109 } 110 111 ranges = append(ranges, rnge{li, ri, format}) 112 total += int(ri-li) + 1 113 return nil 114 } 115 116 pfx := "" // everything before '{' 117 sfx := "" // everything after '}' 118 119 state := StateBeforeLB 120 121 for _, tok := range tokenize(s) { 122 switch state { 123 case StateBeforeLB: 124 switch tok.typ { 125 case TokLB: 126 state = StateRangeStart 127 case TokRB: 128 return nil, fmt.Errorf(`bad expression - "}" must appear after "{"`) 129 default: 130 pfx += tok.val 131 } 132 133 case StateAfterRB: 134 switch tok.typ { 135 case TokLB, TokRB: 136 return nil, fmt.Errorf(`bad expression - only one "{...}" section is allowed`) 137 default: 138 sfx += tok.val 139 } 140 141 case StateRangeStart: 142 switch tok.typ { 143 case TokNum: 144 rangeStart = tok.val 145 state = StateCommaOrDots 146 case TokRB: 147 state = StateAfterRB 148 default: 149 return nil, fmt.Errorf(`bad expression - expecting a number or "}", got %q`, tok.val) 150 } 151 152 case StateCommaOrDots: 153 switch tok.typ { 154 case TokComma: 155 if err := addRange(rangeStart, ""); err != nil { 156 return nil, err 157 } 158 state = StateRangeStart 159 case TokRB: 160 if err := addRange(rangeStart, ""); err != nil { 161 return nil, err 162 } 163 state = StateAfterRB 164 case TokDots: 165 state = StateRangeEnd 166 default: 167 return nil, fmt.Errorf(`bad expression - expecting ",", ".." or "}", got %q`, tok.val) 168 } 169 170 case StateRangeEnd: 171 switch tok.typ { 172 case TokNum: 173 if err := addRange(rangeStart, tok.val); err != nil { 174 return nil, err 175 } 176 state = StateComma 177 default: 178 return nil, fmt.Errorf(`bad expression - expecting a number, got %q`, tok.val) 179 } 180 181 case StateComma: 182 switch tok.typ { 183 case TokComma: 184 state = StateRangeStart 185 case TokRB: 186 state = StateAfterRB 187 default: 188 return nil, fmt.Errorf(`bad expression - expecting "," or "}", got %q`, tok.val) 189 } 190 191 default: 192 panic("impossible") 193 } 194 } 195 196 if len(ranges) == 0 { 197 return []string{pfx + sfx}, nil 198 } 199 200 out := make([]string, 0, total) 201 for _, rng := range ranges { 202 for i := rng.l; i <= rng.r; i++ { 203 out = append(out, fmt.Sprintf("%s"+rng.fmt+"%s", pfx, i, sfx)) 204 } 205 } 206 return out, nil 207 } 208 209 //////////////////////////////////////////////////////////////////////////////// 210 // Tokenizer. 211 212 const ( 213 TokLB = iota // non-escaped '{' 214 TokRB // non-escaped '}' 215 TokNum // a sequence of digits 216 TokRunes // an arbitrary sequence of non-special runes 217 TokComma // ',' 218 TokDots // '..' 219 ) 220 221 type token struct { 222 typ int // one of TOK_* constants 223 val string // substring the token was parsed from 224 } 225 226 func tokenize(s string) (out []token) { 227 rs := []rune(s) 228 229 emit := func(tok int, val string) { 230 out = append(out, token{tok, val}) 231 } 232 233 for i := 0; i < len(rs); i++ { 234 // Advances 'i' util rs[i] matches the predicate. 235 readUntil := func(pred func(r rune) bool) string { 236 start := i 237 for i < len(rs) && pred(rs[i]) { 238 i++ 239 } 240 i-- // overstepped 241 return string(rs[start : i+1]) 242 } 243 244 switch { 245 case rs[i] == '{': 246 // Escaped '{'? 247 if i != len(rs)-1 && rs[i+1] == '{' { 248 emit(TokRunes, "{") 249 i++ // consumed already 250 } else { 251 emit(TokLB, "{") 252 } 253 case rs[i] == '}': 254 // Escaped '}'? 255 if i != len(rs)-1 && rs[i+1] == '}' { 256 emit(TokRunes, "}") 257 i++ // consumed already 258 } else { 259 emit(TokRB, "}") 260 } 261 case rs[i] == ',': 262 emit(TokComma, ",") 263 case rs[i] == '.': 264 // ".."? 265 if i != len(rs)-1 && rs[i+1] == '.' { 266 emit(TokDots, "..") 267 i++ // consumed already 268 } else { 269 emit(TokRunes, ".") // regular single dot 270 } 271 case rs[i] >= '0' && rs[i] <= '9': 272 emit(TokNum, readUntil(func(r rune) bool { 273 return r >= '0' && r <= '9' 274 })) 275 default: 276 emit(TokRunes, readUntil(func(r rune) bool { 277 special := r == '{' || 278 r == '}' || 279 r == ',' || 280 r == '.' || 281 (r >= '0' && r <= '9') 282 return !special 283 })) 284 } 285 } 286 287 return 288 }