9fans.net/go@v0.0.7/cmd/acme/internal/regx/regx.go (about) 1 // #include <u.h> 2 // #include <libc.h> 3 // #include <draw.h> 4 // #include <thread.h> 5 // #include <cursor.h> 6 // #include <mouse.h> 7 // #include <keyboard.h> 8 // #include <frame.h> 9 // #include <fcall.h> 10 // #include <plumb.h> 11 // #include <libsec.h> 12 // #include "dat.h" 13 // #include "fns.h" 14 15 package regx 16 17 import ( 18 "fmt" 19 "runtime" 20 "unicode/utf8" 21 22 "9fans.net/go/cmd/acme/internal/alog" 23 "9fans.net/go/cmd/acme/internal/runes" 24 "9fans.net/go/cmd/acme/internal/util" 25 ) 26 27 // var sel Rangeset - in ecmd.go 28 var lastregexp []rune 29 30 // #undef class 31 // #define class regxclass /* some systems declare "class" in system headers */ 32 33 /* 34 * Machine Information 35 */ 36 37 type Inst struct { 38 typ rune 39 40 // former union 41 subid int 42 rclass int 43 right *Inst 44 45 // former union 46 next *Inst 47 } 48 49 const NPROG = 1024 50 51 var program [NPROG]Inst 52 var progp int 53 var startinst *Inst /* First inst. of program; might not be program[0] */ 54 var bstartinst *Inst /* same for backwards machine */ 55 var rechan chan *Inst 56 57 type Ilist struct { 58 inst *Inst 59 se Ranges 60 startp int 61 } 62 63 const NLIST = 127 64 65 var tl []Ilist 66 var nl []Ilist /* This list, next list */ 67 var list [2][NLIST + 1]Ilist /* +1 for trailing null */ 68 var sempty Ranges 69 70 /* 71 * Actions and Tokens 72 * 73 * 0x10000xx are operators, value == precedence 74 * 0x20000xx are tokens, i.e. operands for operators 75 */ 76 const ( 77 OPERATOR = 0x1000000 /* Bit set in all operators */ 78 START = OPERATOR + 0 /* Start, used for marker on stack */ 79 RBRA = OPERATOR + 1 /* Right bracket, */ 80 LBRA = OPERATOR + 2 /* Left bracket, */ 81 OR = OPERATOR + 3 /* Alternation, | */ 82 CAT = OPERATOR + 4 /* Concatentation, implicit operator */ 83 STAR = OPERATOR + 5 /* Closure, * */ 84 PLUS = OPERATOR + 6 /* a+ == aa* */ 85 QUEST = OPERATOR + 7 /* a? == a|nothing, i.e. 0 or 1 a's */ 86 ANY = 0x2000000 /* Any character but newline, . */ 87 NOP = ANY + 1 /* No operation, internal use only */ 88 BOL = ANY + 2 /* Beginning of line, ^ */ 89 EOL = ANY + 3 /* End of line, $ */ 90 CCLASS = ANY + 4 /* Character class, [] */ 91 NCCLASS = ANY + 5 /* Negated character class, [^] */ 92 END = ANY + 0x77 /* Terminate: match found */ 93 94 ISATOR = OPERATOR 95 ISAND = ANY 96 97 QUOTED = 0x4000000 /* Bit set for \-ed lex characters */ 98 ) 99 100 /* 101 * Parser Information 102 */ 103 104 type Node struct { 105 first *Inst 106 last *Inst 107 } 108 109 const NSTACK = 20 110 111 var andstack [NSTACK]Node 112 var andp int 113 var atorstack [NSTACK]int 114 var atorp int 115 var lastwasand bool /* Last token was operand */ 116 var cursubid int 117 var subidstack [NSTACK]int 118 var subidp int 119 var backwards bool 120 var nbra int 121 var exprp []rune /* pointer to next character in source expression */ 122 const DCLASS = 10 /* allocation increment */ 123 var class [][]rune 124 var negateclass bool 125 126 func Init() { 127 rechan = make(chan *Inst) 128 } 129 130 func regerror(e string) { 131 lastregexp = lastregexp[:0] 132 alog.Printf("regexp: %s\n", e) 133 rechan <- nil 134 runtime.Goexit() // TODO(rsc) 135 } 136 137 func newinst(t rune) *Inst { 138 if progp >= NPROG { 139 regerror("expression too long") 140 } 141 p := &program[progp] 142 progp++ 143 *p = Inst{} 144 p.typ = t 145 return p 146 } 147 148 func realcompile(s []rune) { 149 startlex(s) 150 atorp = 0 151 andp = 0 152 subidp = 0 153 cursubid = 0 154 lastwasand = false 155 /* Start with a low priority operator to prime parser */ 156 pushator(START - 1) 157 for { 158 token := lex() 159 if token == END { 160 break 161 } 162 if token&ISATOR == OPERATOR { 163 operator(int(token)) 164 } else { 165 operand(token) 166 } 167 } 168 /* Close with a low priority operator */ 169 evaluntil(START) 170 /* Force END */ 171 operand(END) 172 evaluntil(START) 173 if nbra != 0 { 174 regerror("unmatched `('") 175 } 176 andp-- /* points to first and only operand */ 177 rechan <- andstack[andp].first 178 } 179 180 func Compile(r []rune) bool { 181 if runesEqual(lastregexp, r) { 182 return true 183 } 184 lastregexp = lastregexp[:0] 185 for _, c := range class { 186 // free(c) 187 _ = c 188 } 189 class = class[:0] 190 progp = 0 191 backwards = false 192 bstartinst = nil 193 go realcompile(r) 194 startinst = <-rechan 195 if startinst == nil { 196 return false 197 } 198 optimize(0) 199 oprogp := progp 200 backwards = true 201 go realcompile(r) 202 bstartinst = <-rechan 203 if bstartinst == nil { 204 return false 205 } 206 optimize(oprogp) 207 lastregexp = append(lastregexp[:0], r...) 208 return true 209 } 210 211 func operand(t rune) { 212 if lastwasand { 213 operator(CAT) /* catenate is implicit */ 214 } 215 i := newinst(t) 216 if t == CCLASS { 217 if negateclass { 218 i.typ = NCCLASS /* UGH */ 219 } 220 i.rclass = len(class) - 1 /* UGH */ 221 } 222 pushand(i, i) 223 lastwasand = true 224 } 225 226 func operator(t int) { 227 if t == RBRA { 228 nbra-- 229 if nbra < 0 { 230 regerror("unmatched `)'") 231 } 232 } 233 if t == LBRA { 234 /* 235 * if(++cursubid >= NSUBEXP) 236 * regerror(Esubexp); 237 */ 238 cursubid++ /* silently ignored */ 239 nbra++ 240 if lastwasand { 241 operator(CAT) 242 } 243 } else { 244 evaluntil(t) 245 } 246 if t != RBRA { 247 pushator(t) 248 } 249 lastwasand = false 250 if t == STAR || t == QUEST || t == PLUS || t == RBRA { 251 lastwasand = true /* these look like operands */ 252 } 253 } 254 255 func pushand(f *Inst, l *Inst) { 256 if andp >= len(andstack) { 257 util.Fatal("operand stack overflow") 258 } 259 a := &andstack[andp] 260 andp++ 261 a.first = f 262 a.last = l 263 } 264 265 func pushator(t int) { 266 if atorp >= NSTACK { 267 util.Fatal("operator stack overflow") 268 } 269 atorstack[atorp] = t 270 atorp++ 271 if cursubid >= NRange { 272 subidstack[subidp] = -1 273 subidp++ 274 } else { 275 subidstack[subidp] = cursubid 276 subidp++ 277 } 278 } 279 280 func popand(op int) *Node { 281 if andp <= 0 { 282 if op != 0 { 283 regerror(fmt.Sprintf("missing operand for %c", op)) 284 } else { 285 regerror("malformed regexp") 286 } 287 } 288 andp-- 289 return &andstack[andp] 290 } 291 292 func popator() int { 293 if atorp <= 0 { 294 util.Fatal("operator stack underflow") 295 } 296 subidp-- 297 atorp-- 298 return atorstack[atorp] 299 } 300 301 func evaluntil(pri int) { 302 for pri == RBRA || atorstack[atorp-1] >= pri { 303 var inst2 *Inst 304 var inst1 *Inst 305 var t *Node 306 var op2 *Node 307 var op1 *Node 308 switch popator() { 309 case LBRA: 310 op1 = popand('(') 311 inst2 = newinst(RBRA) 312 inst2.subid = subidstack[subidp] 313 op1.last.next = inst2 314 inst1 = newinst(LBRA) 315 inst1.subid = subidstack[subidp] 316 inst1.next = op1.first 317 pushand(inst1, inst2) 318 return /* must have been RBRA */ 319 default: 320 util.Fatal("unknown regexp operator") 321 case OR: 322 op2 = popand('|') 323 op1 = popand('|') 324 inst2 = newinst(NOP) 325 op2.last.next = inst2 326 op1.last.next = inst2 327 inst1 = newinst(OR) 328 inst1.right = op1.first 329 inst1.next = op2.first 330 pushand(inst1, inst2) 331 case CAT: 332 op2 = popand(0) 333 op1 = popand(0) 334 if backwards && op2.first.typ != END { 335 t = op1 336 op1 = op2 337 op2 = t 338 } 339 op1.last.next = op2.first 340 pushand(op1.first, op2.last) 341 case STAR: 342 op2 = popand('*') 343 inst1 = newinst(OR) 344 op2.last.next = inst1 345 inst1.right = op2.first 346 pushand(inst1, inst1) 347 case PLUS: 348 op2 = popand('+') 349 inst1 = newinst(OR) 350 op2.last.next = inst1 351 inst1.right = op2.first 352 pushand(op2.first, inst1) 353 case QUEST: 354 op2 = popand('?') 355 inst1 = newinst(OR) 356 inst2 = newinst(NOP) 357 inst1.next = inst2 358 inst1.right = op2.first 359 op2.last.next = inst2 360 pushand(inst1, inst2) 361 } 362 } 363 } 364 365 func optimize(start int) { 366 for i := start; program[i].typ != END; i++ { 367 inst := &program[i] 368 target := inst.next 369 for target.typ == NOP { 370 target = target.next 371 } 372 inst.next = target 373 } 374 } 375 376 func startlex(s []rune) { 377 exprp = s 378 nbra = 0 379 } 380 381 func lex() rune { 382 if len(exprp) == 0 { 383 return END 384 } 385 386 c := exprp[0] 387 exprp = exprp[1:] 388 switch c { 389 case '\\': 390 if len(exprp) > 0 { 391 c = exprp[0] 392 exprp = exprp[1:] 393 if c == 'n' { 394 c = '\n' 395 } 396 } 397 case '*': 398 c = STAR 399 case '?': 400 c = QUEST 401 case '+': 402 c = PLUS 403 case '|': 404 c = OR 405 case '.': 406 c = ANY 407 case '(': 408 c = LBRA 409 case ')': 410 c = RBRA 411 case '^': 412 c = BOL 413 case '$': 414 c = EOL 415 case '[': 416 c = CCLASS 417 bldcclass() 418 } 419 return c 420 } 421 422 func nextrec() rune { 423 if len(exprp) == 0 || (len(exprp) == 1 && exprp[0] == '\\') { 424 regerror("malformed `[]'") 425 } 426 if exprp[0] == '\\' { 427 exprp = exprp[1:] 428 if exprp[0] == 'n' { 429 exprp = exprp[1:] 430 return '\n' 431 } 432 c := exprp[0] 433 exprp = exprp[1:] 434 return c | QUOTED 435 } 436 c := exprp[0] 437 exprp = exprp[1:] 438 return c 439 } 440 441 func bldcclass() { 442 var classp []rune 443 /* we have already seen the '[' */ 444 if exprp[0] == '^' { /* don't match newline in negate case */ 445 classp = append(classp, '\n') 446 negateclass = true 447 exprp = exprp[1:] 448 } else { 449 negateclass = false 450 } 451 for { 452 c1 := nextrec() 453 if c1 == ']' { 454 break 455 } 456 if c1 == '-' { 457 regerror("malformed `[]'") 458 } 459 if exprp[0] == '-' { 460 exprp = exprp[1:] /* eat '-' */ 461 c2 := nextrec() 462 if c2 == ']' { 463 regerror("malformed `[]'") 464 } 465 classp = append(classp, utf8.MaxRune, c1, c2) 466 } else { 467 classp = append(classp, c1&^QUOTED) 468 } 469 } 470 class = append(class, classp) 471 } 472 473 func classmatch(classno int, c rune, negate bool) bool { 474 p := class[classno] 475 for len(p) > 0 { 476 if p[0] == utf8.MaxRune { 477 if p[1] <= c && c <= p[2] { 478 return !negate 479 } 480 p = p[3:] 481 } else { 482 r := p[0] 483 p = p[1:] 484 if r == c { 485 return !negate 486 } 487 } 488 } 489 return negate 490 } 491 492 /* 493 * Note optimization in addinst: 494 * *l must be pending when addinst called; if *l has been looked 495 * at already, the optimization is a bug. 496 */ 497 func addinst(l []Ilist, inst *Inst, sep *Ranges) int { 498 i := 0 499 p := &l[i] 500 for p.inst != nil { 501 if p.inst == inst { 502 if sep.R[0].Pos < p.se.R[0].End { 503 p.se = *sep /* this would be bug */ 504 } 505 return 0 /* It's already there */ 506 } 507 i++ 508 p = &l[i] 509 } 510 p.inst = inst 511 p.se = *sep 512 l[i+1].inst = nil 513 return 1 514 } 515 516 func Null() bool { 517 return startinst == nil || bstartinst == nil 518 } 519 520 /* either t!=nil or r!=nil, and we match the string in the appropriate place */ 521 func Match(t runes.Text, r []rune, startp int, eof int, rp *Ranges) bool { 522 flag := 0 523 p := startp 524 startchar := rune(0) 525 wrapped := 0 526 nnl := 0 527 if startinst.typ < OPERATOR { 528 startchar = startinst.typ 529 } 530 list[1][0].inst = nil 531 list[0][0].inst = list[1][0].inst 532 Sel.R[0].Pos = -1 533 var nc int 534 if t != nil { 535 nc = t.Len() 536 } else { 537 nc = len(r) 538 } 539 /* Execute machine once for each character */ 540 for ; ; p++ { 541 doloop: 542 var c rune 543 if p >= eof || p >= nc { 544 tmp22 := wrapped 545 wrapped++ 546 switch tmp22 { 547 case 0, /* let loop run one more click */ 548 2: 549 break 550 case 1: /* expired; wrap to beginning */ 551 if Sel.R[0].Pos >= 0 || eof != runes.Infinity { 552 goto Return 553 } 554 list[1][0].inst = nil 555 list[0][0].inst = list[1][0].inst 556 p = 0 557 goto doloop 558 default: 559 goto Return 560 } 561 c = 0 562 } else { 563 if ((wrapped != 0 && p >= startp) || Sel.R[0].Pos > 0) && nnl == 0 { 564 break 565 } 566 if t != nil { 567 c = t.RuneAt(p) 568 } else { 569 c = r[p] 570 } 571 } 572 /* fast check for first char */ 573 if startchar != 0 && nnl == 0 && c != startchar { 574 continue 575 } 576 tl = list[flag][:] 577 flag ^= 1 578 nl = list[flag][:] 579 nl[0].inst = nil 580 ntl := nnl 581 nnl = 0 582 if Sel.R[0].Pos < 0 && (wrapped == 0 || p < startp || startp == eof) { 583 /* Add first instruction to this list */ 584 sempty.R[0].Pos = p 585 if addinst(tl, startinst, &sempty) != 0 { 586 ntl++ 587 if ntl >= NLIST { 588 goto Overflow 589 } 590 } 591 } 592 /* Execute machine until this list is empty */ 593 for tlp := 0; ; tlp++ { 594 inst := tl[tlp].inst 595 if inst == nil { 596 break 597 } /* assignment = */ 598 Switchstmt: 599 switch inst.typ { 600 default: /* regular character */ 601 if inst.typ == c { 602 goto Addinst 603 } 604 case LBRA: 605 if inst.subid >= 0 { 606 tl[tlp].se.R[inst.subid].Pos = p 607 } 608 inst = inst.next 609 goto Switchstmt 610 case RBRA: 611 if inst.subid >= 0 { 612 tl[tlp].se.R[inst.subid].End = p 613 } 614 inst = inst.next 615 goto Switchstmt 616 case ANY: 617 if c != '\n' { 618 goto Addinst 619 } 620 case BOL: 621 if p == 0 || (t != nil && t.RuneAt(p-1) == '\n') || (r != nil && r[p-1] == '\n') { 622 inst = inst.next 623 goto Switchstmt 624 } 625 case EOL: 626 if c == '\n' { 627 inst = inst.next 628 goto Switchstmt 629 } 630 case CCLASS: 631 if c >= 0 && classmatch(inst.rclass, c, false) { 632 goto Addinst 633 } 634 case NCCLASS: 635 if c >= 0 && classmatch(inst.rclass, c, true) { 636 goto Addinst 637 } 638 /* evaluate right choice later */ 639 case OR: 640 if addinst(tl, inst.right, &tl[tlp].se) != 0 { 641 ntl++ 642 if ntl >= NLIST { 643 goto Overflow 644 } 645 } 646 /* efficiency: advance and re-evaluate */ 647 inst = inst.next 648 goto Switchstmt 649 case END: /* Match! */ 650 tl[tlp].se.R[0].End = p 651 newmatch(&tl[tlp].se) 652 } 653 continue 654 655 Addinst: 656 if addinst(nl, inst.next, &tl[tlp].se) != 0 { 657 nnl++ 658 if nnl >= NLIST { 659 goto Overflow 660 } 661 } 662 663 } 664 } 665 Return: 666 *rp = Sel 667 return Sel.R[0].Pos >= 0 668 669 Overflow: 670 alog.Printf("regexp list overflow\n") 671 Sel.R[0].Pos = -1 672 goto Return 673 } 674 675 func newmatch(sp *Ranges) { 676 if Sel.R[0].Pos < 0 || sp.R[0].Pos < Sel.R[0].Pos || (sp.R[0].Pos == Sel.R[0].Pos && sp.R[0].End > Sel.R[0].End) { 677 Sel = *sp 678 } 679 } 680 681 func MatchBackward(t runes.Text, startp int, rp *Ranges) bool { 682 flag := 0 683 nnl := 0 684 wrapped := 0 685 p := startp 686 startchar := rune(0) 687 if bstartinst.typ < OPERATOR { 688 startchar = bstartinst.typ 689 } 690 list[1][0].inst = nil 691 list[0][0].inst = list[1][0].inst 692 Sel.R[0].Pos = -1 693 /* Execute machine once for each character, including terminal NUL */ 694 for ; ; p-- { 695 doloop: 696 var c rune 697 if p <= 0 { 698 tmp23 := wrapped 699 wrapped++ 700 switch tmp23 { 701 case 0, /* let loop run one more click */ 702 2: 703 break 704 case 1: /* expired; wrap to end */ 705 if Sel.R[0].Pos >= 0 { 706 goto Return 707 } 708 list[1][0].inst = nil 709 list[0][0].inst = list[1][0].inst 710 p = t.Len() 711 goto doloop 712 case 3: 713 fallthrough 714 default: 715 goto Return 716 } 717 c = 0 718 } else { 719 if ((wrapped != 0 && p <= startp) || Sel.R[0].Pos > 0) && nnl == 0 { 720 break 721 } 722 c = t.RuneAt(p - 1) 723 } 724 /* fast check for first char */ 725 if startchar != 0 && nnl == 0 && c != startchar { 726 continue 727 } 728 tl = list[flag][:] 729 flag ^= 1 730 nl = list[flag][:] 731 nl[0].inst = nil 732 ntl := nnl 733 nnl = 0 734 if Sel.R[0].Pos < 0 && (wrapped == 0 || p > startp) { 735 /* Add first instruction to this list */ 736 /* the minus is so the optimizations in addinst work */ 737 sempty.R[0].Pos = -p 738 if addinst(tl, bstartinst, &sempty) != 0 { 739 ntl++ 740 if ntl >= NLIST { 741 goto Overflow 742 } 743 } 744 } 745 /* Execute machine until this list is empty */ 746 for tlp := 0; ; tlp++ { 747 inst := tl[tlp].inst 748 if inst == nil { 749 break 750 } /* assignment = */ 751 Switchstmt: 752 switch inst.typ { 753 default: /* regular character */ 754 if inst.typ == c { 755 goto Addinst 756 } 757 case LBRA: 758 if inst.subid >= 0 { 759 tl[tlp].se.R[inst.subid].Pos = p 760 } 761 inst = inst.next 762 goto Switchstmt 763 case RBRA: 764 if inst.subid >= 0 { 765 tl[tlp].se.R[inst.subid].End = p 766 } 767 inst = inst.next 768 goto Switchstmt 769 case ANY: 770 if c != '\n' { 771 goto Addinst 772 } 773 case BOL: 774 if c == '\n' || p == 0 { 775 inst = inst.next 776 goto Switchstmt 777 } 778 case EOL: 779 if p < t.Len() && t.RuneAt(p) == '\n' { 780 inst = inst.next 781 goto Switchstmt 782 } 783 case CCLASS: 784 if c > 0 && classmatch(inst.rclass, c, false) { 785 goto Addinst 786 } 787 case NCCLASS: 788 if c > 0 && classmatch(inst.rclass, c, true) { 789 goto Addinst 790 } 791 /* evaluate right choice later */ 792 case OR: 793 if addinst(tl, inst.right, &tl[tlp].se) != 0 { 794 ntl++ 795 if ntl >= NLIST { 796 goto Overflow 797 } 798 } 799 /* efficiency: advance and re-evaluate */ 800 inst = inst.next 801 goto Switchstmt 802 case END: /* Match! */ 803 tl[tlp].se.R[0].Pos = -tl[tlp].se.R[0].Pos /* minus sign */ 804 tl[tlp].se.R[0].End = p 805 bnewmatch(&tl[tlp].se) 806 } 807 continue 808 809 Addinst: 810 if addinst(nl, inst.next, &tl[tlp].se) != 0 { 811 nnl++ 812 if nnl >= NLIST { 813 goto Overflow 814 } 815 } 816 817 } 818 } 819 Return: 820 *rp = Sel 821 return Sel.R[0].Pos >= 0 822 823 Overflow: 824 alog.Printf("regexp list overflow\n") 825 Sel.R[0].Pos = -1 826 goto Return 827 } 828 829 func bnewmatch(sp *Ranges) { 830 if Sel.R[0].Pos < 0 || sp.R[0].Pos > Sel.R[0].End || (sp.R[0].Pos == Sel.R[0].End && sp.R[0].End < Sel.R[0].Pos) { 831 for i := 0; i < NRange; i++ { /* note the reversal; q0<=q1 */ 832 Sel.R[i].Pos = sp.R[i].End 833 Sel.R[i].End = sp.R[i].Pos 834 } 835 } 836 } 837 838 func runesEqual(x, y []rune) bool { 839 if len(x) != len(y) { 840 return false 841 } 842 for i := range x { 843 if x[i] != y[i] { 844 return false 845 } 846 } 847 return true 848 } 849 850 const NRange = 10 851 852 type Ranges struct { 853 R [NRange]runes.Range 854 } 855 856 var Sel Ranges