github.com/blynn/nex@v0.0.0-20210330102341-1a3320dab988/nex.go (about) 1 // Substantial copy-and-paste from src/pkg/regexp. 2 package main 3 4 import ( 5 "bufio" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "log" 11 "os" 12 "sort" 13 "strconv" 14 "strings" 15 ) 16 import ( 17 "go/format" 18 "go/parser" 19 "go/printer" 20 "go/token" 21 ) 22 23 type rule struct { 24 regex []rune 25 code string 26 startCode string 27 endCode string 28 kid []*rule 29 id string 30 } 31 32 var ( 33 ErrInternal = errors.New("internal error") 34 ErrUnmatchedLpar = errors.New("unmatched '('") 35 ErrUnmatchedRpar = errors.New("unmatched ')'") 36 ErrUnmatchedLbkt = errors.New("unmatched '['") 37 ErrUnmatchedRbkt = errors.New("unmatched ']'") 38 ErrBadRange = errors.New("bad range in character class") 39 ErrExtraneousBackslash = errors.New("extraneous backslash") 40 ErrBareClosure = errors.New("closure applies to nothing") 41 ErrBadBackslash = errors.New("illegal backslash escape") 42 ErrExpectedLBrace = errors.New("expected '{'") 43 ErrUnmatchedLBrace = errors.New("unmatched '{'") 44 ErrUnexpectedEOF = errors.New("unexpected EOF") 45 ErrUnexpectedNewline = errors.New("unexpected newline") 46 ErrUnexpectedLAngle = errors.New("unexpected '<'") 47 ErrUnmatchedLAngle = errors.New("unmatched '<'") 48 ErrUnmatchedRAngle = errors.New("unmatched '>'") 49 ) 50 51 func ispunct(c rune) bool { 52 for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" { 53 if c == r { 54 return true 55 } 56 } 57 return false 58 } 59 60 var escapes = []rune("abfnrtv") 61 var escaped = []rune("\a\b\f\n\r\t\v") 62 63 func escape(c rune) rune { 64 for i, b := range escapes { 65 if b == c { 66 return escaped[i] 67 } 68 } 69 return -1 70 } 71 72 const ( 73 kNil = iota 74 kRune 75 kClass 76 kWild 77 kStart 78 kEnd 79 ) 80 81 type edge struct { 82 kind int // Rune/Class/Wild/Nil. 83 r rune // Rune for rune edges. 84 lim []rune // Pairs of limits for character class edges. 85 negate bool // True if the character class is negated. 86 dst *node // Destination node. 87 } 88 type node struct { 89 e edges // Outedges. 90 n int // Index number. Scoped to a family. 91 accept bool // True if this is an accepting state. 92 set []int // The NFA nodes represented by a DFA node. 93 } 94 95 type edges []*edge 96 97 func (e edges) Len() int { 98 return len(e) 99 } 100 func (e edges) Less(i, j int) bool { 101 return e[i].r < e[j].r 102 } 103 104 func (e edges) Swap(i, j int) { 105 e[i], e[j] = e[j], e[i] 106 } 107 108 type RuneSlice []rune 109 110 func (p RuneSlice) Len() int { return len(p) } 111 func (p RuneSlice) Less(i, j int) bool { return p[i] < p[j] } 112 func (p RuneSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 113 114 // Print a graph in DOT format given the start node. 115 // 116 // $ dot -Tps input.dot -o output.ps 117 func writeDotGraph(outf *os.File, start *node, id string) { 118 done := make(map[*node]bool) 119 var show func(*node) 120 show = func(u *node) { 121 if u.accept { 122 fmt.Fprintf(outf, " %v[style=filled,color=green];\n", u.n) 123 } 124 done[u] = true 125 for _, e := range u.e { 126 // We use -1 to denote the dead end node in DFAs. 127 if e.dst.n == -1 { 128 continue 129 } 130 label := "" 131 runeToDot := func(r rune) string { 132 if strconv.IsPrint(r) { 133 return fmt.Sprintf("%v", string(r)) 134 } 135 return fmt.Sprintf("U+%X", int(r)) 136 } 137 switch e.kind { 138 case kRune: 139 label = fmt.Sprintf("[label=%q]", runeToDot(e.r)) 140 case kWild: 141 label = "[color=blue]" 142 case kClass: 143 label = "[label=\"[" 144 if e.negate { 145 label += "^" 146 } 147 for i := 0; i < len(e.lim); i += 2 { 148 label += runeToDot(e.lim[i]) 149 if e.lim[i] != e.lim[i+1] { 150 label += "-" + runeToDot(e.lim[i+1]) 151 } 152 } 153 label += "]\"]" 154 } 155 fmt.Fprintf(outf, " %v -> %v%v;\n", u.n, e.dst.n, label) 156 } 157 for _, e := range u.e { 158 if !done[e.dst] { 159 show(e.dst) 160 } 161 } 162 } 163 fmt.Fprintf(outf, "digraph %v {\n 0[shape=box];\n", id) 164 show(start) 165 fmt.Fprintln(outf, "}") 166 } 167 168 func inClass(r rune, lim []rune) bool { 169 for i := 0; i < len(lim); i += 2 { 170 if lim[i] <= r && r <= lim[i+1] { 171 return true 172 } 173 } 174 return false 175 } 176 177 var dfadot, nfadot *os.File 178 179 func gen(out *bufio.Writer, x *rule) { 180 s := x.regex 181 // Regex -> NFA 182 // We cannot have our alphabet be all Unicode characters. Instead, 183 // we compute an alphabet for each regex: 184 // 185 // 1. Singles: we add single runes used in the regex: any rune not in a 186 // range. These are held in `sing`. 187 // 188 // 2. Ranges: entire ranges become elements of the alphabet. If ranges in 189 // the same expression overlap, we break them up into non-overlapping 190 // ranges. The generated code checks singles before ranges, so there's no 191 // need to break up a range if it contains a single. These are maintained 192 // in sorted order in `lim`. 193 // 194 // 3. Wild: we add an element representing all other runes. 195 // 196 // e.g. the alphabet of /[0-9]*[Ee][2-5]*/ is sing: { E, e }, 197 // lim: { [0-1], [2-5], [6-9] } and the wild element. 198 sing := make(map[rune]bool) 199 var lim []rune 200 var insertLimits func(l, r rune) 201 // Insert a new range [l-r] into `lim`, breaking it up if it overlaps, and 202 // discarding it if it coincides with an existing range. We keep `lim` 203 // sorted. 204 insertLimits = func(l, r rune) { 205 var i int 206 for i = 0; i < len(lim); i += 2 { 207 if l <= lim[i+1] { 208 break 209 } 210 } 211 if len(lim) == i || r < lim[i] { 212 lim = append(lim, 0, 0) 213 copy(lim[i+2:], lim[i:]) 214 lim[i] = l 215 lim[i+1] = r 216 return 217 } 218 if l < lim[i] { 219 lim = append(lim, 0, 0) 220 copy(lim[i+2:], lim[i:]) 221 lim[i+1] = lim[i] - 1 222 lim[i] = l 223 insertLimits(lim[i], r) 224 return 225 } 226 if l > lim[i] { 227 lim = append(lim, 0, 0) 228 copy(lim[i+2:], lim[i:]) 229 lim[i+1] = l - 1 230 lim[i+2] = l 231 insertLimits(l, r) 232 return 233 } 234 // l == lim[i] 235 if r == lim[i+1] { 236 return 237 } 238 if r < lim[i+1] { 239 lim = append(lim, 0, 0) 240 copy(lim[i+2:], lim[i:]) 241 lim[i] = l 242 lim[i+1] = r 243 lim[i+2] = r + 1 244 return 245 } 246 insertLimits(lim[i+1]+1, r) 247 } 248 pos := 0 249 n := 0 250 newNode := func() *node { 251 res := new(node) 252 res.n = n 253 n++ 254 return res 255 } 256 newEdge := func(u, v *node) *edge { 257 res := new(edge) 258 res.dst = v 259 u.e = append(u.e, res) 260 sort.Sort(u.e) 261 return res 262 } 263 newStartEdge := func(u, v *node) *edge { 264 res := newEdge(u, v) 265 res.kind = kStart 266 return res 267 } 268 newEndEdge := func(u, v *node) *edge { 269 res := newEdge(u, v) 270 res.kind = kEnd 271 return res 272 } 273 newWildEdge := func(u, v *node) *edge { 274 res := newEdge(u, v) 275 res.kind = kWild 276 return res 277 } 278 newRuneEdge := func(u, v *node, r rune) *edge { 279 res := newEdge(u, v) 280 res.kind = kRune 281 res.r = r 282 sing[r] = true 283 return res 284 } 285 newNilEdge := func(u, v *node) *edge { 286 res := newEdge(u, v) 287 res.kind = kNil 288 return res 289 } 290 newClassEdge := func(u, v *node) *edge { 291 res := newEdge(u, v) 292 res.kind = kClass 293 res.lim = make([]rune, 0, 2) 294 return res 295 } 296 maybeEscape := func() rune { 297 c := s[pos] 298 if '\\' == c { 299 pos++ 300 if len(s) == pos { 301 panic(ErrExtraneousBackslash) 302 } 303 c = s[pos] 304 switch { 305 case ispunct(c): 306 case escape(c) >= 0: 307 c = escape(s[pos]) 308 default: 309 panic(ErrBadBackslash) 310 } 311 } 312 return c 313 } 314 pcharclass := func() (start, end *node) { 315 start, end = newNode(), newNode() 316 e := newClassEdge(start, end) 317 // Ranges consisting of a single element are a special case: 318 singletonRange := func(c rune) { 319 // 1. The edge-specific 'lim' field always expects endpoints in pairs, 320 // so we must give 'c' as the beginning and the end of the range. 321 e.lim = append(e.lim, c, c) 322 // 2. Instead of updating the regex-wide 'lim' interval set, we add a singleton. 323 sing[c] = true 324 } 325 if len(s) > pos && '^' == s[pos] { 326 e.negate = true 327 pos++ 328 } 329 var left rune 330 leftLive := false 331 justSawDash := false 332 first := true 333 // Allow '-' at the beginning and end, and in ranges. 334 for pos < len(s) && s[pos] != ']' { 335 switch c := maybeEscape(); c { 336 case '-': 337 if first { 338 singletonRange('-') 339 break 340 } 341 justSawDash = true 342 default: 343 if justSawDash { 344 if !leftLive || left > c { 345 panic(ErrBadRange) 346 } 347 e.lim = append(e.lim, left, c) 348 if left == c { 349 sing[c] = true 350 } else { 351 insertLimits(left, c) 352 } 353 leftLive = false 354 } else { 355 if leftLive { 356 singletonRange(left) 357 } 358 left = c 359 leftLive = true 360 } 361 justSawDash = false 362 } 363 first = false 364 pos++ 365 } 366 if leftLive { 367 singletonRange(left) 368 } 369 if justSawDash { 370 singletonRange('-') 371 } 372 return 373 } 374 isNested := false 375 var pre func() (start, end *node) 376 pterm := func() (start, end *node) { 377 if len(s) == pos || s[pos] == '|' { 378 end = newNode() 379 start = end 380 return 381 } 382 switch s[pos] { 383 case '*', '+', '?': 384 panic(ErrBareClosure) 385 case ')': 386 if !isNested { 387 panic(ErrUnmatchedRpar) 388 } 389 end = newNode() 390 start = end 391 return 392 case '(': 393 pos++ 394 oldIsNested := isNested 395 isNested = true 396 start, end = pre() 397 isNested = oldIsNested 398 if len(s) == pos || ')' != s[pos] { 399 panic(ErrUnmatchedLpar) 400 } 401 case '.': 402 start, end = newNode(), newNode() 403 newWildEdge(start, end) 404 case '^': 405 start, end = newNode(), newNode() 406 newStartEdge(start, end) 407 case '$': 408 start, end = newNode(), newNode() 409 newEndEdge(start, end) 410 case ']': 411 panic(ErrUnmatchedRbkt) 412 case '[': 413 pos++ 414 start, end = pcharclass() 415 if len(s) == pos || ']' != s[pos] { 416 panic(ErrUnmatchedLbkt) 417 } 418 default: 419 start, end = newNode(), newNode() 420 newRuneEdge(start, end, maybeEscape()) 421 } 422 pos++ 423 return 424 } 425 pclosure := func() (start, end *node) { 426 start, end = pterm() 427 if start == end { 428 return 429 } 430 if len(s) == pos { 431 return 432 } 433 switch s[pos] { 434 case '*': 435 newNilEdge(end, start) 436 nend := newNode() 437 newNilEdge(end, nend) 438 start, end = end, nend 439 case '+': 440 newNilEdge(end, start) 441 nend := newNode() 442 newNilEdge(end, nend) 443 end = nend 444 case '?': 445 nstart := newNode() 446 newNilEdge(nstart, start) 447 start = nstart 448 newNilEdge(start, end) 449 default: 450 return 451 } 452 pos++ 453 return 454 } 455 pcat := func() (start, end *node) { 456 for { 457 nstart, nend := pclosure() 458 if start == nil { 459 start, end = nstart, nend 460 } else if nstart != nend { 461 end.e = make([]*edge, len(nstart.e)) 462 copy(end.e, nstart.e) 463 end = nend 464 } 465 if nstart == nend { 466 return 467 } 468 } 469 panic("unreachable") 470 } 471 pre = func() (start, end *node) { 472 start, end = pcat() 473 for pos < len(s) && s[pos] != ')' { 474 if s[pos] != '|' { 475 panic(ErrInternal) 476 } 477 pos++ 478 nstart, nend := pcat() 479 tmp := newNode() 480 newNilEdge(tmp, start) 481 newNilEdge(tmp, nstart) 482 start = tmp 483 tmp = newNode() 484 newNilEdge(end, tmp) 485 newNilEdge(nend, tmp) 486 end = tmp 487 } 488 return 489 } 490 start, end := pre() 491 end.accept = true 492 493 // Compute shortlist of nodes (reachable nodes), as we may have discarded 494 // nodes left over from parsing. Also, make short[0] the start node. 495 short := make([]*node, 0, n) 496 { 497 var visit func(*node) 498 mark := make([]bool, n) 499 newn := make([]int, n) 500 visit = func(u *node) { 501 mark[u.n] = true 502 newn[u.n] = len(short) 503 short = append(short, u) 504 for _, e := range u.e { 505 if !mark[e.dst.n] { 506 visit(e.dst) 507 } 508 } 509 } 510 visit(start) 511 for _, v := range short { 512 v.n = newn[v.n] 513 } 514 } 515 n = len(short) 516 517 if nfadot != nil { 518 writeDotGraph(nfadot, start, "NFA_"+x.id) 519 } 520 521 // NFA -> DFA 522 nilClose := func(st []bool) { 523 visited := make([]bool, n) 524 var do func(int) 525 do = func(i int) { 526 visited[i] = true 527 v := short[i] 528 for _, e := range v.e { 529 if e.kind == kNil && !visited[e.dst.n] { 530 st[e.dst.n] = true 531 do(e.dst.n) 532 } 533 } 534 } 535 for i := 0; i < n; i++ { 536 if st[i] && !visited[i] { 537 do(i) 538 } 539 } 540 } 541 var todo []*node 542 tab := make(map[string]*node) 543 var buf []byte 544 dfacount := 0 545 { // Construct the node of no return. 546 for i := 0; i < n; i++ { 547 buf = append(buf, '0') 548 } 549 tmp := new(node) 550 tmp.n = -1 551 tab[string(buf)] = tmp 552 } 553 newDFANode := func(st []bool) (res *node, found bool) { 554 buf = nil 555 accept := false 556 for i, v := range st { 557 if v { 558 buf = append(buf, '1') 559 accept = accept || short[i].accept 560 } else { 561 buf = append(buf, '0') 562 } 563 } 564 res, found = tab[string(buf)] 565 if !found { 566 res = new(node) 567 res.n = dfacount 568 res.accept = accept 569 dfacount++ 570 for i, v := range st { 571 if v { 572 res.set = append(res.set, i) 573 } 574 } 575 tab[string(buf)] = res 576 } 577 return res, found 578 } 579 580 get := func(states []bool) *node { 581 nilClose(states) 582 node, old := newDFANode(states) 583 if !old { 584 todo = append(todo, node) 585 } 586 return node 587 } 588 getcb := func(v *node, cb func(*edge) bool) *node { 589 states := make([]bool, n) 590 for _, i := range v.set { 591 for _, e := range short[i].e { 592 if cb(e) { 593 states[e.dst.n] = true 594 } 595 } 596 } 597 return get(states) 598 } 599 states := make([]bool, n) 600 // The DFA start state is the state representing the nil-closure of the start 601 // node in the NFA. Recall it has index 0. 602 states[0] = true 603 dfastart := get(states) 604 for len(todo) > 0 { 605 v := todo[len(todo)-1] 606 todo = todo[0 : len(todo)-1] 607 // Singles. 608 var runes []rune 609 for r, _ := range sing { 610 runes = append(runes, r) 611 } 612 sort.Sort(RuneSlice(runes)) 613 for _, r := range runes { 614 newRuneEdge(v, getcb(v, func(e *edge) bool { 615 return e.kind == kRune && e.r == r || 616 e.kind == kWild || 617 e.kind == kClass && e.negate != inClass(r, e.lim) 618 }), r) 619 } 620 // Character ranges. 621 for j := 0; j < len(lim); j += 2 { 622 e := newClassEdge(v, getcb(v, func(e *edge) bool { 623 return e.kind == kWild || 624 e.kind == kClass && e.negate != inClass(lim[j], e.lim) 625 })) 626 627 e.lim = append(e.lim, lim[j], lim[j+1]) 628 } 629 // Wild. 630 newWildEdge(v, getcb(v, func(e *edge) bool { 631 return e.kind == kWild || (e.kind == kClass && e.negate) 632 })) 633 // ^ and $. 634 newStartEdge(v, getcb(v, func(e *edge) bool { return e.kind == kStart })) 635 newEndEdge(v, getcb(v, func(e *edge) bool { return e.kind == kEnd })) 636 } 637 n = dfacount 638 639 if dfadot != nil { 640 writeDotGraph(dfadot, dfastart, "DFA_"+x.id) 641 } 642 // DFA -> Go 643 sorted := make([]*node, n) 644 for _, v := range tab { 645 if -1 != v.n { 646 sorted[v.n] = v 647 } 648 } 649 650 fmt.Fprintf(out, "\n// %v\n", string(x.regex)) 651 for i, v := range sorted { 652 if i == 0 { 653 out.WriteString("{[]bool{") 654 } else { 655 out.WriteString(", ") 656 } 657 if v.accept { 658 out.WriteString("true") 659 } else { 660 out.WriteString("false") 661 } 662 } 663 out.WriteString("}, []func(rune) int{ // Transitions\n") 664 for _, v := range sorted { 665 out.WriteString("func(r rune) int {\n") 666 var runeCases, classCases string 667 var wildDest int 668 for _, e := range v.e { 669 m := e.dst.n 670 switch e.kind { 671 case kRune: 672 runeCases += fmt.Sprintf("\t\tcase %d: return %d\n", e.r, m) 673 case kClass: 674 classCases += fmt.Sprintf("\t\tcase %d <= r && r <= %d: return %d\n", 675 e.lim[0], e.lim[1], m) 676 case kWild: 677 wildDest = m 678 } 679 } 680 if runeCases != "" { 681 out.WriteString("\tswitch(r) {\n" + runeCases + "\t}\n") 682 } 683 if classCases != "" { 684 out.WriteString("\tswitch {\n" + classCases + "\t}\n") 685 } 686 fmt.Fprintf(out, "\treturn %v\n},\n", wildDest) 687 } 688 out.WriteString("}, []int{ /* Start-of-input transitions */ ") 689 for _, v := range sorted { 690 s := " -1," 691 for _, e := range v.e { 692 if e.kind == kStart { 693 s = fmt.Sprintf(" %d,", e.dst.n) 694 break 695 } 696 } 697 out.WriteString(s) 698 } 699 out.WriteString("}, []int{ /* End-of-input transitions */ ") 700 for _, v := range sorted { 701 s := " -1," 702 for _, e := range v.e { 703 if e.kind == kEnd { 704 s = fmt.Sprintf(" %d,", e.dst.n) 705 break 706 } 707 } 708 out.WriteString(s) 709 } 710 out.WriteString("},") 711 if len(x.kid) == 0 { 712 out.WriteString("nil") 713 } else { 714 out.WriteString("[]dfa{") 715 for _, kid := range x.kid { 716 gen(out, kid) 717 } 718 out.WriteString("}") 719 } 720 out.WriteString("},\n") 721 } 722 723 func writeFamily(out *bufio.Writer, node *rule, lvl int) { 724 tab := func() { 725 for i := 0; i <= lvl; i++ { 726 out.WriteByte('\t') 727 } 728 } 729 if node.startCode != "" { 730 tab() 731 prefixReplacer.WriteString(out, "if !yylex.stale {\n") 732 tab() 733 out.WriteString("\t" + node.startCode + "\n") 734 tab() 735 out.WriteString("}\n") 736 } 737 tab() 738 fmt.Fprintf(out, "OUTER%s%d:\n", node.id, lvl) 739 tab() 740 prefixReplacer.WriteString(out, 741 fmt.Sprintf("for { switch yylex.next(%v) {\n", lvl)) 742 for i, x := range node.kid { 743 tab() 744 fmt.Fprintf(out, "\tcase %d:\n", i) 745 lvl++ 746 if x.kid != nil { 747 writeFamily(out, x, lvl) 748 } else { 749 tab() 750 out.WriteString("\t" + x.code + "\n") 751 } 752 lvl-- 753 } 754 tab() 755 out.WriteString("\tdefault:\n") 756 tab() 757 fmt.Fprintf(out, "\t\t break OUTER%s%d\n", node.id, lvl) 758 tab() 759 out.WriteString("\t}\n") 760 tab() 761 out.WriteString("\tcontinue\n") 762 tab() 763 out.WriteString("}\n") 764 tab() 765 prefixReplacer.WriteString(out, "yylex.pop()\n") 766 tab() 767 out.WriteString(node.endCode + "\n") 768 } 769 770 var lexertext = `import ("bufio";"io";"strings") 771 type frame struct { 772 i int 773 s string 774 line, column int 775 } 776 type Lexer struct { 777 // The lexer runs in its own goroutine, and communicates via channel 'ch'. 778 ch chan frame 779 ch_stop chan bool 780 // We record the level of nesting because the action could return, and a 781 // subsequent call expects to pick up where it left off. In other words, 782 // we're simulating a coroutine. 783 // TODO: Support a channel-based variant that compatible with Go's yacc. 784 stack []frame 785 stale bool 786 787 // The 'l' and 'c' fields were added for 788 // https://github.com/wagerlabs/docker/blob/65694e801a7b80930961d70c69cba9f2465459be/buildfile.nex 789 // Since then, I introduced the built-in Line() and Column() functions. 790 l, c int 791 792 parseResult interface{} 793 794 // The following line makes it easy for scripts to insert fields in the 795 // generated code. 796 // [NEX_END_OF_LEXER_STRUCT] 797 } 798 799 // NewLexerWithInit creates a new Lexer object, runs the given callback on it, 800 // then returns it. 801 func NewLexerWithInit(in io.Reader, initFun func(*Lexer)) *Lexer { 802 yylex := new(Lexer) 803 if initFun != nil { 804 initFun(yylex) 805 } 806 yylex.ch = make(chan frame) 807 yylex.ch_stop = make(chan bool, 1) 808 var scan func(in *bufio.Reader, ch chan frame, ch_stop chan bool, family []dfa, line, column int) 809 scan = func(in *bufio.Reader, ch chan frame, ch_stop chan bool, family []dfa, line, column int) { 810 // Index of DFA and length of highest-precedence match so far. 811 matchi, matchn := 0, -1 812 var buf []rune 813 n := 0 814 checkAccept := func(i int, st int) bool { 815 // Higher precedence match? DFAs are run in parallel, so matchn is at most len(buf), hence we may omit the length equality check. 816 if family[i].acc[st] && (matchn < n || matchi > i) { 817 matchi, matchn = i, n 818 return true 819 } 820 return false 821 } 822 var state [][2]int 823 for i := 0; i < len(family); i++ { 824 mark := make([]bool, len(family[i].startf)) 825 // Every DFA starts at state 0. 826 st := 0 827 for { 828 state = append(state, [2]int{i, st}) 829 mark[st] = true 830 // As we're at the start of input, follow all ^ transitions and append to our list of start states. 831 st = family[i].startf[st] 832 if -1 == st || mark[st] { break } 833 // We only check for a match after at least one transition. 834 checkAccept(i, st) 835 } 836 } 837 atEOF := false 838 stopped := false 839 for { 840 if n == len(buf) && !atEOF { 841 r,_,err := in.ReadRune() 842 switch err { 843 case io.EOF: atEOF = true 844 case nil: buf = append(buf, r) 845 default: panic(err) 846 } 847 } 848 if !atEOF { 849 r := buf[n] 850 n++ 851 var nextState [][2]int 852 for _, x := range state { 853 x[1] = family[x[0]].f[x[1]](r) 854 if -1 == x[1] { continue } 855 nextState = append(nextState, x) 856 checkAccept(x[0], x[1]) 857 } 858 state = nextState 859 } else { 860 dollar: // Handle $. 861 for _, x := range state { 862 mark := make([]bool, len(family[x[0]].endf)) 863 for { 864 mark[x[1]] = true 865 x[1] = family[x[0]].endf[x[1]] 866 if -1 == x[1] || mark[x[1]] { break } 867 if checkAccept(x[0], x[1]) { 868 // Unlike before, we can break off the search. Now that we're at the end, there's no need to maintain the state of each DFA. 869 break dollar 870 } 871 } 872 } 873 state = nil 874 } 875 876 if state == nil { 877 lcUpdate := func(r rune) { 878 if r == '\n' { 879 line++ 880 column = 0 881 } else { 882 column++ 883 } 884 } 885 // All DFAs stuck. Return last match if it exists, otherwise advance by one rune and restart all DFAs. 886 if matchn == -1 { 887 if len(buf) == 0 { // This can only happen at the end of input. 888 break 889 } 890 lcUpdate(buf[0]) 891 buf = buf[1:] 892 } else { 893 text := string(buf[:matchn]) 894 buf = buf[matchn:] 895 matchn = -1 896 select { 897 case ch <- frame{matchi, text, line, column}: { 898 } 899 case stopped = <- ch_stop: { 900 } 901 } 902 if stopped { 903 break 904 } 905 if len(family[matchi].nest) > 0 { 906 scan(bufio.NewReader(strings.NewReader(text)), ch, ch_stop, family[matchi].nest, line, column) 907 } 908 if atEOF { 909 break 910 } 911 for _, r := range text { 912 lcUpdate(r) 913 } 914 } 915 n = 0 916 for i := 0; i < len(family); i++ { 917 state = append(state, [2]int{i, 0}) 918 } 919 } 920 } 921 ch <- frame{-1, "", line, column} 922 } 923 go scan(bufio.NewReader(in), yylex.ch, yylex.ch_stop, dfas, 0, 0) 924 return yylex 925 } 926 927 type dfa struct { 928 acc []bool // Accepting states. 929 f []func(rune) int // Transitions. 930 startf, endf []int // Transitions at start and end of input. 931 nest []dfa 932 } 933 934 var dfas = []dfa{` 935 936 var lexeroutro = `} 937 938 func NewLexer(in io.Reader) *Lexer { 939 return NewLexerWithInit(in, nil) 940 } 941 942 func (yyLex *Lexer) Stop() { 943 yyLex.ch_stop <- true 944 } 945 946 // Text returns the matched text. 947 func (yylex *Lexer) Text() string { 948 return yylex.stack[len(yylex.stack) - 1].s 949 } 950 951 // Line returns the current line number. 952 // The first line is 0. 953 func (yylex *Lexer) Line() int { 954 if len(yylex.stack) == 0 { 955 return 0 956 } 957 return yylex.stack[len(yylex.stack) - 1].line 958 } 959 960 // Column returns the current column number. 961 // The first column is 0. 962 func (yylex *Lexer) Column() int { 963 if len(yylex.stack) == 0 { 964 return 0 965 } 966 return yylex.stack[len(yylex.stack) - 1].column 967 } 968 969 func (yylex *Lexer) next(lvl int) int { 970 if lvl == len(yylex.stack) { 971 l, c := 0, 0 972 if lvl > 0 { 973 l, c = yylex.stack[lvl - 1].line, yylex.stack[lvl - 1].column 974 } 975 yylex.stack = append(yylex.stack, frame{0, "", l, c}) 976 } 977 if lvl == len(yylex.stack) - 1 { 978 p := &yylex.stack[lvl] 979 *p = <-yylex.ch 980 yylex.stale = false 981 } else { 982 yylex.stale = true 983 } 984 return yylex.stack[lvl].i 985 } 986 func (yylex *Lexer) pop() { 987 yylex.stack = yylex.stack[:len(yylex.stack) - 1] 988 } 989 ` 990 991 func writeLex(out *bufio.Writer, root rule) { 992 if !customError { 993 // TODO: I can't remember what this was for! 994 prefixReplacer.WriteString(out, `func (yylex Lexer) Error(e string) { 995 panic(e) 996 }`) 997 } 998 prefixReplacer.WriteString(out, ` 999 // Lex runs the lexer. Always returns 0. 1000 // When the -s option is given, this function is not generated; 1001 // instead, the NN_FUN macro runs the lexer. 1002 func (yylex *Lexer) Lex(lval *yySymType) int { 1003 `) 1004 writeFamily(out, &root, 0) 1005 out.WriteString("\treturn 0\n}\n") 1006 } 1007 func writeNNFun(out *bufio.Writer, root rule) { 1008 prefixReplacer.WriteString(out, "func(yylex *Lexer) {\n") 1009 writeFamily(out, &root, 0) 1010 out.WriteString("}") 1011 } 1012 func process(output io.Writer, input io.Reader) error { 1013 lineno := 1 1014 in := bufio.NewReader(input) 1015 out := bufio.NewWriter(output) 1016 var r rune 1017 read := func() bool { 1018 var err error 1019 r, _, err = in.ReadRune() 1020 if err == io.EOF { 1021 return true 1022 } 1023 if err != nil { 1024 panic(err) 1025 } 1026 if r == '\n' { 1027 lineno++ 1028 } 1029 return false 1030 } 1031 skipws := func() bool { 1032 for !read() { 1033 if strings.IndexRune(" \n\t\r", r) == -1 { 1034 return false 1035 } 1036 } 1037 return true 1038 } 1039 var buf []rune 1040 readCode := func() string { 1041 if '{' != r { 1042 panic(ErrExpectedLBrace) 1043 } 1044 buf = []rune{r} 1045 nesting := 1 1046 for { 1047 if read() { 1048 panic(ErrUnmatchedLBrace) 1049 } 1050 buf = append(buf, r) 1051 if '{' == r { 1052 nesting++ 1053 } else if '}' == r { 1054 nesting-- 1055 if 0 == nesting { 1056 break 1057 } 1058 } 1059 } 1060 return string(buf) 1061 } 1062 var root rule 1063 needRootRAngle := false 1064 var parse func(*rule) error 1065 parse = func(node *rule) error { 1066 for { 1067 panicIf(skipws, ErrUnexpectedEOF) 1068 if '<' == r { 1069 if node != &root || len(node.kid) > 0 { 1070 panic(ErrUnexpectedLAngle) 1071 } 1072 panicIf(skipws, ErrUnexpectedEOF) 1073 node.startCode = readCode() 1074 needRootRAngle = true 1075 continue 1076 } else if '>' == r { 1077 if node == &root { 1078 if !needRootRAngle { 1079 panic(ErrUnmatchedRAngle) 1080 } 1081 } 1082 if skipws() { 1083 return ErrUnexpectedEOF 1084 } 1085 node.endCode = readCode() 1086 return nil 1087 } 1088 delim := r 1089 panicIf(read, ErrUnexpectedEOF) 1090 var regex []rune 1091 for { 1092 if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { 1093 break 1094 } 1095 if '\n' == r { 1096 return ErrUnexpectedNewline 1097 } 1098 regex = append(regex, r) 1099 panicIf(read, ErrUnexpectedEOF) 1100 } 1101 if "" == string(regex) { 1102 break 1103 } 1104 panicIf(skipws, ErrUnexpectedEOF) 1105 x := new(rule) 1106 x.id = fmt.Sprintf("%d", lineno) 1107 node.kid = append(node.kid, x) 1108 x.regex = make([]rune, len(regex)) 1109 copy(x.regex, regex) 1110 if '<' == r { 1111 panicIf(skipws, ErrUnexpectedEOF) 1112 x.startCode = readCode() 1113 parse(x) 1114 } else { 1115 x.code = readCode() 1116 } 1117 } 1118 return nil 1119 } 1120 err := parse(&root) 1121 if err != nil { 1122 return err 1123 } 1124 1125 buf = nil 1126 for done := skipws(); !done; done = read() { 1127 buf = append(buf, r) 1128 } 1129 fs := token.NewFileSet() 1130 // Append a blank line to make things easier when there are only package and 1131 // import declarations. 1132 t, err := parser.ParseFile(fs, "", string(buf)+"\n", parser.ImportsOnly) 1133 if err != nil { 1134 panic(err) 1135 } 1136 printer.Fprint(out, fs, t) 1137 1138 var file *token.File 1139 fs.Iterate(func(f *token.File) bool { 1140 file = f 1141 return true 1142 }) 1143 1144 // Skip over package and import declarations. This is why we appended a blank 1145 // line above. 1146 for m := file.LineCount(); m > 1; m-- { 1147 i := 0 1148 for '\n' != buf[i] { 1149 i++ 1150 } 1151 buf = buf[i+1:] 1152 } 1153 1154 prefixReplacer.WriteString(out, lexertext) 1155 1156 for _, kid := range root.kid { 1157 gen(out, kid) 1158 } 1159 prefixReplacer.WriteString(out, lexeroutro) 1160 if !standalone { 1161 writeLex(out, root) 1162 out.WriteString(string(buf)) 1163 out.Flush() 1164 if len(outFilename) > 0 { 1165 gofmt() 1166 } 1167 return nil 1168 } 1169 m := 0 1170 const funmac = "NN_FUN" 1171 for m < len(buf) { 1172 m++ 1173 if funmac[:m] != string(buf[:m]) { 1174 out.WriteString(string(buf[:m])) 1175 buf = buf[m:] 1176 m = 0 1177 } else if funmac == string(buf[:m]) { 1178 writeNNFun(out, root) 1179 buf = buf[m:] 1180 m = 0 1181 } 1182 } 1183 out.WriteString(string(buf)) 1184 out.Flush() 1185 if len(outFilename) > 0 { 1186 gofmt() 1187 } 1188 return nil 1189 } 1190 1191 func gofmt() { 1192 src, err := ioutil.ReadFile(outFilename) 1193 if err != nil { 1194 return 1195 } 1196 src, err = format.Source(src) 1197 if err != nil { 1198 return 1199 } 1200 ioutil.WriteFile(outFilename, src, 0666) 1201 } 1202 1203 func panicIf(f func() bool, err error) { 1204 if f() { 1205 panic(err) 1206 } 1207 } 1208 1209 func dieIf(cond bool, v ...interface{}) { 1210 if cond { 1211 log.Fatal(v...) 1212 } 1213 } 1214 1215 func dieErr(err error, s string) { 1216 if err != nil { 1217 log.Fatalf("%v: %v", s, err) 1218 } 1219 } 1220 1221 func createDotFile(filename string) *os.File { 1222 if filename == "" { 1223 return nil 1224 } 1225 suf := strings.HasSuffix(filename, ".nex") 1226 dieIf(suf, "nex: DOT filename ends with .nex:", filename) 1227 file, err := os.Create(filename) 1228 dieErr(err, "Create") 1229 return file 1230 }