github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/lib/stringlib/pattern/builder.go (about) 1 package pattern 2 3 import ( 4 "errors" 5 "fmt" 6 ) 7 8 const maxPatternSize = 10000 9 10 type patternBuilder struct { 11 items []patternItem 12 ciMax uint64 13 cStack []uint64 14 ptn string 15 i int 16 anchorLeft, anchorRight bool 17 } 18 19 func (pb *patternBuilder) getPattern() (*Pattern, error) { 20 // var anchorLeft, anchorRight bool 21 // if len(pb.ptn) > 0 && pb.ptn[0] == '^' { 22 // anchorLeft = true 23 // pb.ptn = pb.ptn[1:] 24 // } 25 // if last := len(pb.ptn) - 1; last >= 0 && pb.ptn[last] == '$' { 26 // anchorRight = true 27 // pb.ptn = pb.ptn[:last] 28 // } 29 sz := 0 30 for pb.i < len(pb.ptn) { 31 err := pb.getPatternItem() 32 if err != nil { 33 return nil, err 34 } 35 sz++ 36 if sz > maxPatternSize { 37 return nil, errPatternTooComplex 38 } 39 } 40 if len(pb.cStack) != 0 { 41 return nil, errUnfinishedCapture 42 } 43 return &Pattern{ 44 items: pb.items, 45 captureCount: int(pb.ciMax), 46 startAnchor: pb.anchorLeft, 47 endAnchor: pb.anchorRight, 48 }, nil 49 } 50 51 func (pb *patternBuilder) next() (byte, error) { 52 if pb.i >= len(pb.ptn) { 53 return 0, errInvalidPattern 54 } 55 b := pb.ptn[pb.i] 56 pb.i++ 57 return b, nil 58 } 59 60 func (pb *patternBuilder) back() { 61 pb.i-- 62 } 63 64 func (pb *patternBuilder) emit(item patternItem) { 65 pb.items = append(pb.items, item) 66 } 67 68 func (pb *patternBuilder) getPatternItem() error { 69 b, err := pb.next() 70 if err != nil { 71 return err 72 } 73 var s byteSet 74 switch b { 75 case '^': 76 if pb.i == 1 { 77 pb.anchorLeft = true 78 return nil 79 } 80 pb.back() 81 s, err = pb.getCharClass() 82 case '$': 83 if pb.i == len(pb.ptn) { 84 pb.anchorRight = true 85 return nil 86 } 87 pb.back() 88 s, err = pb.getCharClass() 89 case '(': 90 pb.ciMax++ 91 if pb.ciMax >= 10 { 92 return errInvalidPattern 93 } 94 b, err = pb.next() 95 if err != nil { 96 return err 97 } 98 if b != ')' { 99 // Special case: empty capture will generate a position. So we only 100 // emit a ptnStartCapture and skip the ptnEndCapture. The pattern 101 // matcher will then create a capture whose end is -1. 102 pb.back() 103 pb.cStack = append(pb.cStack, pb.ciMax) 104 } 105 pb.emit(patternItem{byteSet{pb.ciMax}, ptnStartCapture}) 106 return nil 107 case ')': 108 i := len(pb.cStack) - 1 109 if i < 0 { 110 return errInvalidPatternCapture 111 } 112 pb.emit(patternItem{byteSet{pb.cStack[i]}, ptnEndCapture}) 113 pb.cStack = pb.cStack[:i] 114 return nil 115 case '%': 116 c, err := pb.next() 117 if err != nil { 118 return err 119 } 120 switch { 121 case c == 'f': 122 s, err := pb.getCharClass() 123 if err == nil { 124 pb.emit(patternItem{s, ptnFrontier}) 125 } 126 return err 127 case c == 'b': 128 op, err := pb.next() 129 if err != nil { 130 return err 131 } 132 cl, err := pb.next() 133 if err != nil { 134 return err 135 } 136 // The doc says op and cl must be different, but the 5.3.4 137 // implementation allows them to be equal. 138 // if op == cl { 139 // return errInvalidPattern 140 // } 141 pb.emit(patternItem{[4]uint64{uint64(op), uint64(cl)}, ptnBalanced}) 142 return nil 143 case c >= '1' && c <= '9': 144 ci := uint64(c - '0') 145 if !pb.checkCapture(ci) { 146 return ErrInvalidCaptureIdx(int(ci)) 147 } 148 pb.emit(patternItem{[4]uint64{ci}, ptnCapture}) 149 return nil 150 default: 151 s, err = getCharRange(c) 152 if err != nil { 153 return err 154 } 155 } 156 default: 157 pb.back() 158 s, err = pb.getCharClass() 159 } 160 if err != nil { 161 return err 162 } 163 b, err = pb.next() 164 ptnType := ptnOnce 165 if err == nil { 166 switch b { 167 case '*': 168 ptnType = ptnGreedyRepeat 169 case '+': 170 ptnType = ptnGreedyRepeatOnce 171 case '-': 172 ptnType = ptnRepeat 173 case '?': 174 ptnType = ptnOptional 175 default: 176 pb.back() 177 } 178 } 179 pb.emit(patternItem{s, ptnType}) 180 return nil 181 } 182 183 func (pb *patternBuilder) checkCapture(ci uint64) bool { 184 if ci > pb.ciMax { 185 return false 186 } 187 for _, sci := range pb.cStack { 188 if sci == ci { 189 return false 190 } 191 } 192 return true 193 } 194 195 func (pb *patternBuilder) getCharClass() (byteSet, error) { 196 b, err := pb.next() 197 if err != nil { 198 return byteSet{}, err 199 } 200 switch b { 201 case '.': 202 return fullSet, nil 203 case '%': 204 b, err := pb.next() 205 if err != nil { 206 return byteSet{}, err 207 } 208 return getCharRange(b) 209 case '[': 210 return pb.getUnion() 211 default: 212 s := byteSet{} 213 s.add(b) 214 return s, nil 215 } 216 } 217 218 func (pb *patternBuilder) getUnion() (s byteSet, err error) { 219 var b byte 220 b, err = pb.next() 221 neg := false 222 // Note: no need to check err if b is not 0 223 if b == '^' { 224 neg = true 225 b, err = pb.next() 226 } 227 if b == ']' { 228 s.add(b) 229 b, err = pb.next() 230 } 231 var r byteSet 232 Loop: 233 for err == nil { 234 switch { 235 case b == ']': 236 if neg { 237 s.complement() 238 } 239 return 240 case b == '%': 241 b, err = pb.next() 242 if err != nil { 243 return 244 } 245 r, err = getCharRange(b) 246 if err != nil { 247 return 248 } 249 s.merge(r) 250 default: 251 c := b 252 b, err = pb.next() 253 if err != nil { 254 return 255 } 256 if b == '-' { 257 b, err = pb.next() 258 if err != nil { 259 return 260 } 261 if b == ']' { 262 s.add(c) 263 s.add('-') 264 continue Loop 265 } 266 s.merge(byteRange(c, b)) 267 } else { 268 s.add(c) 269 continue Loop 270 } 271 } 272 b, err = pb.next() 273 } 274 return 275 } 276 277 func getCharRange(c byte) (byteSet, error) { 278 s, ok := namedByteSet[c] 279 if !ok { 280 switch { 281 case c == '0': 282 return s, ErrInvalidCaptureIdx(0) 283 case (c >= '1' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'): 284 return s, ErrInvalidPct 285 default: 286 s.add(c) 287 } 288 } 289 return s, nil 290 } 291 292 var ErrInvalidPct = errors.New("invalid use of '%'") 293 294 func ErrInvalidCaptureIdx(i int) error { 295 return fmt.Errorf("invalid capture index %%%d", i) 296 }