github.com/jmigpin/editor@v1.6.0/util/parseutil/scmatch.go (about) 1 package parseutil 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "regexp" 8 "unicode" 9 ) 10 11 // scanner match utility funcs 12 type ScMatch struct { 13 sc *Scanner 14 P *ScParse 15 cache struct { 16 regexps map[string]*regexp.Regexp 17 } 18 } 19 20 func (m *ScMatch) init(sc *Scanner) { 21 m.sc = sc 22 m.P = &sc.P 23 m.cache.regexps = map[string]*regexp.Regexp{} 24 } 25 26 //---------- 27 28 func (m *ScMatch) Eof() bool { 29 pos0 := m.sc.KeepPos() 30 _, err := m.sc.ReadRune() 31 if err == nil { 32 pos0.Restore() 33 return false 34 } 35 return err == io.EOF 36 } 37 38 //---------- 39 40 func (m *ScMatch) Rune(ru rune) error { 41 return m.sc.RestorePosOnErr(func() error { 42 ru2, err := m.sc.ReadRune() 43 if err != nil { 44 return err 45 } 46 if ru2 != ru { 47 return NoMatchErr 48 } 49 return nil 50 }) 51 } 52 func (m *ScMatch) RuneAny(rs []rune) error { // "or", any of the runes 53 return m.sc.RestorePosOnErr(func() error { 54 ru, err := m.sc.ReadRune() 55 if err != nil { 56 return err 57 } 58 if !ContainsRune(rs, ru) { 59 return NoMatchErr 60 } 61 return nil 62 }) 63 } 64 func (m *ScMatch) RuneAnyNot(rs []rune) error { // "or", any of the runes 65 return m.sc.RestorePosOnErr(func() error { 66 ru, err := m.sc.ReadRune() 67 if err != nil { 68 return err 69 } 70 if ContainsRune(rs, ru) { 71 return NoMatchErr 72 } 73 return nil 74 }) 75 } 76 func (m *ScMatch) RuneSequence(seq []rune) error { 77 return m.sc.RestorePosOnErr(func() error { 78 for i, l := 0, len(seq); i < l; i++ { 79 ru := seq[i] 80 if m.sc.Reverse { 81 ru = seq[l-1-i] 82 } 83 84 // NOTE: using spm.Rune() would call keeppos n times 85 86 ru2, err := m.sc.ReadRune() 87 if err != nil { 88 return err 89 } 90 if ru2 != ru { 91 return NoMatchErr 92 } 93 } 94 return nil 95 }) 96 } 97 func (m *ScMatch) RuneSequenceMid(rs []rune) error { 98 return m.sc.RestorePosOnErr(func() error { 99 for k := 0; ; k++ { 100 if err := m.RuneSequence(rs); err == nil { 101 return nil // match 102 } 103 if k+1 >= len(rs) { 104 break 105 } 106 // backup to previous rune to try to match again 107 m.sc.Reverse = !m.sc.Reverse 108 _, err := m.sc.ReadRune() 109 m.sc.Reverse = !m.sc.Reverse 110 if err != nil { 111 return err 112 } 113 } 114 return NoMatchErr 115 }) 116 } 117 func (m *ScMatch) RuneRange(rr RuneRange) error { 118 return m.sc.RestorePosOnErr(func() error { 119 ru, err := m.sc.ReadRune() 120 if err != nil { 121 return err 122 } 123 if !rr.HasRune(ru) { 124 return NoMatchErr 125 } 126 return nil 127 }) 128 } 129 func (m *ScMatch) RuneRangeNot(rr RuneRange) error { // negation 130 return m.sc.RestorePosOnErr(func() error { 131 ru, err := m.sc.ReadRune() 132 if err != nil { 133 return err 134 } 135 if rr.HasRune(ru) { 136 return NoMatchErr 137 } 138 return nil 139 }) 140 } 141 func (m *ScMatch) RunesAndRuneRanges(rs []rune, rrs RuneRanges) error { // negation 142 return m.sc.RestorePosOnErr(func() error { 143 ru, err := m.sc.ReadRune() 144 if err != nil { 145 return err 146 } 147 if !ContainsRune(rs, ru) && !rrs.HasRune(ru) { 148 return NoMatchErr 149 } 150 return nil 151 }) 152 } 153 func (m *ScMatch) RunesAndRuneRangesNot(rs []rune, rrs RuneRanges) error { 154 return m.sc.RestorePosOnErr(func() error { 155 ru, err := m.sc.ReadRune() 156 if err != nil { 157 return err 158 } 159 if ContainsRune(rs, ru) || rrs.HasRune(ru) { 160 return NoMatchErr 161 } 162 return nil 163 }) 164 } 165 166 //---------- 167 168 func (m *ScMatch) RuneFn(fn func(rune) bool) error { 169 pos0 := m.sc.KeepPos() 170 ru, err := m.sc.ReadRune() 171 if err == nil { 172 if !fn(ru) { 173 pos0.Restore() 174 err = NoMatchErr 175 } 176 } 177 return err 178 } 179 180 // one or more 181 func (m *ScMatch) RuneFnLoop(fn func(rune) bool) error { 182 for first := true; ; first = false { 183 if err := m.RuneFn(fn); err != nil { 184 if first { 185 return err 186 } 187 return nil 188 } 189 } 190 } 191 192 //func (m *SMatcher) RuneFnZeroOrMore(fn func(rune) bool) int { 193 // for i := 0; ; i++ { 194 // if err := m.RuneFn(fn); err != nil { 195 // return i 196 // } 197 // } 198 //} 199 //func (m *SMatcher) RuneFnOneOrMore(fn func(rune) bool) error { 200 // return m.LoopRuneFn(fn) 201 202 // if err := m.RuneFn(fn); err != nil { 203 // return err 204 // } 205 // _ = m.RuneFnZeroOrMore(fn) 206 // return nil 207 //} 208 209 //---------- 210 211 func (m *ScMatch) Sequence(seq string) error { 212 return m.RuneSequence([]rune(seq)) 213 } 214 func (m *ScMatch) SequenceMid(seq string) error { 215 return m.RuneSequenceMid([]rune(seq)) 216 } 217 218 //// same as rune sequence, but directly using strings comparison 219 //func (m *ScMatch) Sequence(seq string) error { 220 // if m.sc.Reverse { 221 // return m.RuneSequence([]rune(seq)) 222 // } 223 // l := len(seq) 224 // b := m.sc.Src[m.sc.Pos:] 225 // if l > len(b) { 226 // return NoMatchErr 227 // } 228 // if string(b[:l]) != seq { 229 // return NoMatchErr 230 // } 231 // m.sc.Pos += l 232 // return nil 233 //} 234 235 //---------- 236 237 func (m *ScMatch) RegexpFromStartCached(res string, maxLen int) error { 238 return m.RegexpFromStart(res, true, maxLen) 239 } 240 func (m *ScMatch) RegexpFromStart(res string, cache bool, maxLen int) error { 241 // TODO: reverse 242 243 res = "^(" + res + ")" // from start 244 245 re := (*regexp.Regexp)(nil) 246 if cache { 247 re2, ok := m.cache.regexps[res] 248 if ok { 249 re = re2 250 } 251 } 252 if re == nil { 253 re3, err := regexp.Compile(res) 254 if err != nil { 255 return err 256 } 257 re = re3 258 if cache { 259 m.cache.regexps[res] = re 260 } 261 } 262 263 // limit input to be read 264 src := m.sc.Src[m.sc.Pos:] 265 max := maxLen 266 if max > len(src) { 267 max = len(src) 268 } 269 src = m.sc.Src[m.sc.Pos : m.sc.Pos+max] 270 271 locs := re.FindIndex(src) 272 if len(locs) == 0 { 273 return NoMatchErr 274 } 275 m.sc.Pos += locs[1] 276 return nil 277 } 278 279 //---------- 280 281 func (m *ScMatch) DoubleQuotedString(maxLen int) error { 282 return m.StringSection("\"", '\\', true, maxLen, false) 283 } 284 func (m *ScMatch) QuotedString() error { 285 //return m.QuotedString2('\\', 3000, 10) 286 return m.QuotedString2('\\', 3000, 3000) 287 } 288 289 // allows escaped runes (if esc!=0) 290 func (m *ScMatch) QuotedString2(esc rune, maxLen1, maxLen2 int) error { 291 // doublequote: fail on newline, eof doesn't close 292 if err := m.StringSection("\"", esc, true, maxLen1, false); err == nil { 293 return nil 294 } 295 // singlequote: fail on newline, eof doesn't close (usually a smaller maxlen) 296 if err := m.StringSection("'", esc, true, maxLen2, false); err == nil { 297 return nil 298 } 299 // backquote: can have newline, eof doesn't close 300 if err := m.StringSection("`", esc, false, maxLen1, false); err == nil { 301 return nil 302 } 303 return fmt.Errorf("not a quoted string") 304 } 305 306 func (m *ScMatch) StringSection(openclose string, esc rune, failOnNewline bool, maxLen int, eofClose bool) error { 307 return m.Section(openclose, openclose, esc, failOnNewline, maxLen, eofClose) 308 } 309 310 // match opened/closed sections. 311 func (m *ScMatch) Section(open, close string, esc rune, failOnNewline bool, maxLen int, eofClose bool) error { 312 pos0 := m.sc.Pos 313 return m.sc.RestorePosOnErr(func() error { 314 if err := m.Sequence(open); err != nil { 315 return err 316 } 317 for { 318 if esc != 0 && m.EscapeAny(esc) == nil { 319 continue 320 } 321 if err := m.Sequence(close); err == nil { 322 return nil // ok 323 } 324 // consume rune 325 ru, err := m.sc.ReadRune() 326 if err != nil { 327 // extension: stop on eof 328 if eofClose && err == io.EOF { 329 return nil // ok 330 } 331 332 return err 333 } 334 // extension: stop after maxlength 335 if maxLen > 0 { 336 d := m.sc.Pos - pos0 337 if d < 0 { // handle reverse 338 d = -d 339 } 340 if d > maxLen { 341 return fmt.Errorf("passed maxlen") 342 } 343 } 344 // extension: newline 345 if failOnNewline && ru == '\n' { 346 return fmt.Errorf("found newline") 347 } 348 } 349 }) 350 } 351 352 //---------- 353 354 func (m *ScMatch) EscapeAny(escape rune) error { 355 return m.sc.RestorePosOnErr(func() error { 356 if m.sc.Reverse { 357 if err := m.NRunes(1); err != nil { 358 return err 359 } 360 } 361 if err := m.Rune(escape); err != nil { 362 return err 363 } 364 if !m.sc.Reverse { 365 return m.NRunes(1) 366 } 367 return nil 368 }) 369 } 370 func (m *ScMatch) NRunes(n int) error { 371 pos0 := m.sc.KeepPos() 372 for i := 0; i < n; i++ { 373 _, err := m.sc.ReadRune() 374 if err != nil { 375 pos0.Restore() 376 return err 377 } 378 } 379 return nil 380 } 381 382 //---------- 383 384 func (m *ScMatch) SpacesIncludingNL() bool { 385 err := m.Spaces(true, 0) 386 return err == nil 387 } 388 func (m *ScMatch) SpacesExcludingNL() bool { 389 err := m.Spaces(false, 0) 390 return err == nil 391 } 392 func (m *ScMatch) Spaces(includeNL bool, escape rune) error { 393 for first := true; ; first = false { 394 if escape != 0 { 395 if err := m.EscapeAny(escape); err == nil { 396 continue 397 } 398 } 399 pos0 := m.sc.KeepPos() 400 ru, err := m.sc.ReadRune() 401 if err == nil { 402 valid := unicode.IsSpace(ru) && (includeNL || ru != '\n') 403 if !valid { 404 err = NoMatchErr 405 } 406 } 407 if err != nil { 408 pos0.Restore() 409 if first { 410 return err 411 } 412 return nil 413 } 414 } 415 } 416 417 //---------- 418 419 func (m *ScMatch) And(fns ...ScFn) error { 420 return m.sc.RestorePosOnErr(func() error { 421 if m.sc.Reverse { 422 for i := len(fns) - 1; i >= 0; i-- { 423 fn := fns[i] 424 if fn == nil { 425 continue 426 } 427 if err := fn(); err != nil { 428 return err 429 } 430 } 431 } else { 432 for _, fn := range fns { 433 if fn == nil { 434 continue 435 } 436 if err := fn(); err != nil { 437 return err 438 } 439 } 440 } 441 return nil 442 }) 443 } 444 func (m *ScMatch) Or(fns ...ScFn) error { 445 //me := iout.MultiError{} // TODO: better then first error? 446 firstErr := error(nil) 447 for _, fn := range fns { 448 if fn == nil { 449 continue 450 } 451 pos0 := m.sc.KeepPos() 452 if err := fn(); err != nil { 453 if firstErr == nil { 454 firstErr = err 455 } 456 if IsScFatalError(err) { 457 return err 458 } 459 pos0.Restore() 460 continue 461 } 462 return nil 463 } 464 return firstErr 465 } 466 func (m *ScMatch) Optional(fn ScFn) error { 467 if fn == nil { 468 return nil 469 } 470 pos0 := m.sc.KeepPos() 471 if err := fn(); err != nil { 472 if IsScFatalError(err) { 473 return err 474 } 475 pos0.Restore() 476 } 477 return nil 478 } 479 480 //---------- 481 482 func (m *ScMatch) ToNLExcludeOrEnd(esc rune) int { 483 pos0 := m.sc.KeepPos() 484 valid := func(ru rune) bool { return ru != '\n' } 485 for { 486 if esc != 0 && m.EscapeAny(esc) == nil { 487 continue 488 } 489 if err := m.RuneFn(valid); err == nil { 490 continue 491 } 492 break 493 } 494 return pos0.Len() 495 } 496 func (m *ScMatch) ToNLIncludeOrEnd(esc rune) int { 497 pos0 := m.sc.KeepPos() 498 _ = m.ToNLExcludeOrEnd(esc) 499 _ = m.Rune('\n') 500 return pos0.Len() 501 } 502 503 //---------- 504 505 func (m *ScMatch) Letter() error { 506 return m.RuneFn(unicode.IsLetter) 507 } 508 func (m *ScMatch) Digit() error { 509 return m.RuneFn(unicode.IsDigit) 510 } 511 func (m *ScMatch) Digits() error { 512 return m.RuneFnLoop(unicode.IsDigit) 513 } 514 515 func (m *ScMatch) Integer() error { 516 // TODO: reverse 517 //u := "[+-]?[0-9]+" 518 //return m.RegexpFromStartCached(u) 519 520 return m.And( 521 m.P.Optional(m.sign), 522 m.Digits, 523 ) 524 } 525 526 func (m *ScMatch) Float() error { 527 // TODO: reverse 528 //u := "[+-]?([0-9]*[.])?[0-9]+" 529 //u := "[+-]?(\\d+([.]\\d*)?([eE][+-]?\\d+)?|[.]\\d+([eE][+-]?\\d+)?)" 530 //return m.RegexpFromStartCached(u, 100) 531 532 return m.Or( 533 // -1.2 534 // -1.2e3 535 m.P.And( 536 m.Integer, 537 m.fraction, 538 m.P.Optional(m.exponent), 539 ), 540 // .2 541 // .2e3 542 m.P.And( 543 m.fraction, 544 m.P.Optional(m.exponent), 545 ), 546 ) 547 } 548 549 func (m *ScMatch) sign() error { 550 return m.sc.M.RuneAny([]rune("+-")) 551 } 552 func (m *ScMatch) fraction() error { 553 return m.And( 554 m.P.Rune('.'), 555 m.Digits, 556 ) 557 } 558 func (m *ScMatch) exponent() error { 559 return m.And( 560 m.P.RuneAny([]rune("eE")), 561 m.P.Optional(m.sign), 562 m.Digits, 563 ) 564 } 565 566 //---------- 567 //---------- 568 //---------- 569 570 type RuneRange [2]rune // assume [0]<[1] 571 572 func (rr RuneRange) HasRune(ru rune) bool { 573 return ru >= rr[0] && ru <= rr[1] 574 } 575 func (rr RuneRange) IntersectsRange(rr2 RuneRange) bool { 576 noIntersection := rr2[1] <= rr[0] || rr2[0] > rr[1] 577 return !noIntersection 578 } 579 func (rr RuneRange) String() string { 580 return fmt.Sprintf("%q-%q", rr[0], rr[1]) 581 } 582 583 //---------- 584 //---------- 585 //---------- 586 587 type RuneRanges []RuneRange 588 589 func (rrs RuneRanges) HasRune(ru rune) bool { 590 for _, rr := range rrs { 591 if rr.HasRune(ru) { 592 return true 593 } 594 } 595 return false 596 } 597 598 //---------- 599 //---------- 600 //---------- 601 602 var NoMatchErr = errors.New("no match")