github.com/couchbaselabs/nex@v0.0.0-20230419191105-421cb5932838/nex.go (about)

     1  // Substantial copy-and-paste from src/pkg/regexp.
     2  package main
     3  
     4  import (
     5  	"bufio"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"log"
    11  	"os"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  )
    16  import (
    17  	"go/format"
    18  	"go/parser"
    19  	"go/printer"
    20  	"go/token"
    21  )
    22  
    23  type rule struct {
    24  	regex     []rune
    25  	code      string
    26  	startCode string
    27  	endCode   string
    28  	kid       []*rule
    29  	id        string
    30  }
    31  
    32  var (
    33  	ErrInternal            = errors.New("internal error")
    34  	ErrUnmatchedLpar       = errors.New("unmatched '('")
    35  	ErrUnmatchedRpar       = errors.New("unmatched ')'")
    36  	ErrUnmatchedLbkt       = errors.New("unmatched '['")
    37  	ErrUnmatchedRbkt       = errors.New("unmatched ']'")
    38  	ErrBadRange            = errors.New("bad range in character class")
    39  	ErrExtraneousBackslash = errors.New("extraneous backslash")
    40  	ErrBareClosure         = errors.New("closure applies to nothing")
    41  	ErrBadBackslash        = errors.New("illegal backslash escape")
    42  	ErrExpectedLBrace      = errors.New("expected '{'")
    43  	ErrUnmatchedLBrace     = errors.New("unmatched '{'")
    44  	ErrUnexpectedEOF       = errors.New("unexpected EOF")
    45  	ErrUnexpectedNewline   = errors.New("unexpected newline")
    46  	ErrUnexpectedLAngle    = errors.New("unexpected '<'")
    47  	ErrUnmatchedLAngle     = errors.New("unmatched '<'")
    48  	ErrUnmatchedRAngle     = errors.New("unmatched '>'")
    49  )
    50  
    51  func ispunct(c rune) bool {
    52  	for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" {
    53  		if c == r {
    54  			return true
    55  		}
    56  	}
    57  	return false
    58  }
    59  
    60  var escapes = []rune("abfnrtv")
    61  var escaped = []rune("\a\b\f\n\r\t\v")
    62  
    63  func escape(c rune) rune {
    64  	for i, b := range escapes {
    65  		if b == c {
    66  			return escaped[i]
    67  		}
    68  	}
    69  	return -1
    70  }
    71  
    72  const (
    73  	kNil = iota
    74  	kRune
    75  	kClass
    76  	kWild
    77  	kStart
    78  	kEnd
    79  )
    80  
    81  type edge struct {
    82  	kind   int    // Rune/Class/Wild/Nil.
    83  	r      rune   // Rune for rune edges.
    84  	lim    []rune // Pairs of limits for character class edges.
    85  	negate bool   // True if the character class is negated.
    86  	dst    *node  // Destination node.
    87  }
    88  type node struct {
    89  	e      edges // Outedges.
    90  	n      int   // Index number. Scoped to a family.
    91  	accept bool  // True if this is an accepting state.
    92  	set    []int // The NFA nodes represented by a DFA node.
    93  }
    94  
    95  type edges []*edge
    96  
    97  func (e edges) Len() int {
    98  	return len(e)
    99  }
   100  func (e edges) Less(i, j int) bool {
   101  	return e[i].r < e[j].r
   102  }
   103  
   104  func (e edges) Swap(i, j int) {
   105  	e[i], e[j] = e[j], e[i]
   106  }
   107  
   108  type RuneSlice []rune
   109  
   110  func (p RuneSlice) Len() int           { return len(p) }
   111  func (p RuneSlice) Less(i, j int) bool { return p[i] < p[j] }
   112  func (p RuneSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   113  
   114  // Print a graph in DOT format given the start node.
   115  //
   116  //	$ dot -Tps input.dot -o output.ps
   117  func writeDotGraph(outf *os.File, start *node, id string) {
   118  	done := make(map[*node]bool)
   119  	var show func(*node)
   120  	show = func(u *node) {
   121  		if u.accept {
   122  			fmt.Fprintf(outf, "  %v[style=filled,color=green];\n", u.n)
   123  		}
   124  		done[u] = true
   125  		for _, e := range u.e {
   126  			// We use -1 to denote the dead end node in DFAs.
   127  			if e.dst.n == -1 {
   128  				continue
   129  			}
   130  			label := ""
   131  			runeToDot := func(r rune) string {
   132  				if strconv.IsPrint(r) {
   133  					return fmt.Sprintf("%v", string(r))
   134  				}
   135  				return fmt.Sprintf("U+%X", int(r))
   136  			}
   137  			switch e.kind {
   138  			case kRune:
   139  				label = fmt.Sprintf("[label=%q]", runeToDot(e.r))
   140  			case kWild:
   141  				label = "[color=blue]"
   142  			case kClass:
   143  				label = "[label=\"["
   144  				if e.negate {
   145  					label += "^"
   146  				}
   147  				for i := 0; i < len(e.lim); i += 2 {
   148  					label += runeToDot(e.lim[i])
   149  					if e.lim[i] != e.lim[i+1] {
   150  						label += "-" + runeToDot(e.lim[i+1])
   151  					}
   152  				}
   153  				label += "]\"]"
   154  			}
   155  			fmt.Fprintf(outf, "  %v -> %v%v;\n", u.n, e.dst.n, label)
   156  		}
   157  		for _, e := range u.e {
   158  			if !done[e.dst] {
   159  				show(e.dst)
   160  			}
   161  		}
   162  	}
   163  	fmt.Fprintf(outf, "digraph %v {\n  0[shape=box];\n", id)
   164  	show(start)
   165  	fmt.Fprintln(outf, "}")
   166  }
   167  
   168  func inClass(r rune, lim []rune) bool {
   169  	for i := 0; i < len(lim); i += 2 {
   170  		if lim[i] <= r && r <= lim[i+1] {
   171  			return true
   172  		}
   173  	}
   174  	return false
   175  }
   176  
   177  var dfadot, nfadot *os.File
   178  
   179  func gen(out *bufio.Writer, x *rule) {
   180  	s := x.regex
   181  	// Regex -> NFA
   182  	// We cannot have our alphabet be all Unicode characters. Instead,
   183  	// we compute an alphabet for each regex:
   184  	//
   185  	//   1. Singles: we add single runes used in the regex: any rune not in a
   186  	//   range. These are held in `sing`.
   187  	//
   188  	//   2. Ranges: entire ranges become elements of the alphabet. If ranges in
   189  	//   the same expression overlap, we break them up into non-overlapping
   190  	//   ranges. The generated code checks singles before ranges, so there's no
   191  	//   need to break up a range if it contains a single. These are maintained
   192  	//   in sorted order in `lim`.
   193  	//
   194  	//   3. Wild: we add an element representing all other runes.
   195  	//
   196  	// e.g. the alphabet of /[0-9]*[Ee][2-5]*/ is sing: { E, e },
   197  	// lim: { [0-1], [2-5], [6-9] } and the wild element.
   198  	sing := make(map[rune]bool)
   199  	var lim []rune
   200  	var insertLimits func(l, r rune)
   201  	// Insert a new range [l-r] into `lim`, breaking it up if it overlaps, and
   202  	// discarding it if it coincides with an existing range. We keep `lim`
   203  	// sorted.
   204  	insertLimits = func(l, r rune) {
   205  		var i int
   206  		for i = 0; i < len(lim); i += 2 {
   207  			if l <= lim[i+1] {
   208  				break
   209  			}
   210  		}
   211  		if len(lim) == i || r < lim[i] {
   212  			lim = append(lim, 0, 0)
   213  			copy(lim[i+2:], lim[i:])
   214  			lim[i] = l
   215  			lim[i+1] = r
   216  			return
   217  		}
   218  		if l < lim[i] {
   219  			lim = append(lim, 0, 0)
   220  			copy(lim[i+2:], lim[i:])
   221  			lim[i+1] = lim[i] - 1
   222  			lim[i] = l
   223  			insertLimits(lim[i], r)
   224  			return
   225  		}
   226  		if l > lim[i] {
   227  			lim = append(lim, 0, 0)
   228  			copy(lim[i+2:], lim[i:])
   229  			lim[i+1] = l - 1
   230  			lim[i+2] = l
   231  			insertLimits(l, r)
   232  			return
   233  		}
   234  		// l == lim[i]
   235  		if r == lim[i+1] {
   236  			return
   237  		}
   238  		if r < lim[i+1] {
   239  			lim = append(lim, 0, 0)
   240  			copy(lim[i+2:], lim[i:])
   241  			lim[i] = l
   242  			lim[i+1] = r
   243  			lim[i+2] = r + 1
   244  			return
   245  		}
   246  		insertLimits(lim[i+1]+1, r)
   247  	}
   248  	pos := 0
   249  	n := 0
   250  	newNode := func() *node {
   251  		res := new(node)
   252  		res.n = n
   253  		n++
   254  		return res
   255  	}
   256  	newEdge := func(u, v *node) *edge {
   257  		res := new(edge)
   258  		res.dst = v
   259  		u.e = append(u.e, res)
   260  		sort.Sort(u.e)
   261  		return res
   262  	}
   263  	newStartEdge := func(u, v *node) *edge {
   264  		res := newEdge(u, v)
   265  		res.kind = kStart
   266  		return res
   267  	}
   268  	newEndEdge := func(u, v *node) *edge {
   269  		res := newEdge(u, v)
   270  		res.kind = kEnd
   271  		return res
   272  	}
   273  	newWildEdge := func(u, v *node) *edge {
   274  		res := newEdge(u, v)
   275  		res.kind = kWild
   276  		return res
   277  	}
   278  	newRuneEdge := func(u, v *node, r rune) *edge {
   279  		res := newEdge(u, v)
   280  		res.kind = kRune
   281  		res.r = r
   282  		sing[r] = true
   283  		return res
   284  	}
   285  	newNilEdge := func(u, v *node) *edge {
   286  		res := newEdge(u, v)
   287  		res.kind = kNil
   288  		return res
   289  	}
   290  	newClassEdge := func(u, v *node) *edge {
   291  		res := newEdge(u, v)
   292  		res.kind = kClass
   293  		res.lim = make([]rune, 0, 2)
   294  		return res
   295  	}
   296  	maybeEscape := func() rune {
   297  		c := s[pos]
   298  		if '\\' == c {
   299  			pos++
   300  			if len(s) == pos {
   301  				panic(ErrExtraneousBackslash)
   302  			}
   303  			c = s[pos]
   304  			switch {
   305  			case ispunct(c):
   306  			case escape(c) >= 0:
   307  				c = escape(s[pos])
   308  			default:
   309  				panic(ErrBadBackslash)
   310  			}
   311  		}
   312  		return c
   313  	}
   314  	pcharclass := func() (start, end *node) {
   315  		start, end = newNode(), newNode()
   316  		e := newClassEdge(start, end)
   317  		// Ranges consisting of a single element are a special case:
   318  		singletonRange := func(c rune) {
   319  			// 1. The edge-specific 'lim' field always expects endpoints in pairs,
   320  			// so we must give 'c' as the beginning and the end of the range.
   321  			e.lim = append(e.lim, c, c)
   322  			// 2. Instead of updating the regex-wide 'lim' interval set, we add a singleton.
   323  			sing[c] = true
   324  		}
   325  		if len(s) > pos && '^' == s[pos] {
   326  			e.negate = true
   327  			pos++
   328  		}
   329  		var left rune
   330  		leftLive := false
   331  		justSawDash := false
   332  		first := true
   333  		// Allow '-' at the beginning and end, and in ranges.
   334  		for pos < len(s) && s[pos] != ']' {
   335  			switch c := maybeEscape(); c {
   336  			case '-':
   337  				if first {
   338  					singletonRange('-')
   339  					break
   340  				}
   341  				justSawDash = true
   342  			default:
   343  				if justSawDash {
   344  					if !leftLive || left > c {
   345  						panic(ErrBadRange)
   346  					}
   347  					e.lim = append(e.lim, left, c)
   348  					if left == c {
   349  						sing[c] = true
   350  					} else {
   351  						insertLimits(left, c)
   352  					}
   353  					leftLive = false
   354  				} else {
   355  					if leftLive {
   356  						singletonRange(left)
   357  					}
   358  					left = c
   359  					leftLive = true
   360  				}
   361  				justSawDash = false
   362  			}
   363  			first = false
   364  			pos++
   365  		}
   366  		if leftLive {
   367  			singletonRange(left)
   368  		}
   369  		if justSawDash {
   370  			singletonRange('-')
   371  		}
   372  		return
   373  	}
   374  	isNested := false
   375  	var pre func() (start, end *node)
   376  	pterm := func() (start, end *node) {
   377  		if len(s) == pos || s[pos] == '|' {
   378  			end = newNode()
   379  			start = end
   380  			return
   381  		}
   382  		switch s[pos] {
   383  		case '*', '+', '?':
   384  			panic(ErrBareClosure)
   385  		case ')':
   386  			if !isNested {
   387  				panic(ErrUnmatchedRpar)
   388  			}
   389  			end = newNode()
   390  			start = end
   391  			return
   392  		case '(':
   393  			pos++
   394  			oldIsNested := isNested
   395  			isNested = true
   396  			start, end = pre()
   397  			isNested = oldIsNested
   398  			if len(s) == pos || ')' != s[pos] {
   399  				panic(ErrUnmatchedLpar)
   400  			}
   401  		case '.':
   402  			start, end = newNode(), newNode()
   403  			newWildEdge(start, end)
   404  		case '^':
   405  			start, end = newNode(), newNode()
   406  			newStartEdge(start, end)
   407  		case '$':
   408  			start, end = newNode(), newNode()
   409  			newEndEdge(start, end)
   410  		case ']':
   411  			panic(ErrUnmatchedRbkt)
   412  		case '[':
   413  			pos++
   414  			start, end = pcharclass()
   415  			if len(s) == pos || ']' != s[pos] {
   416  				panic(ErrUnmatchedLbkt)
   417  			}
   418  		default:
   419  			start, end = newNode(), newNode()
   420  			newRuneEdge(start, end, maybeEscape())
   421  		}
   422  		pos++
   423  		return
   424  	}
   425  	pclosure := func() (start, end *node) {
   426  		start, end = pterm()
   427  		if start == end {
   428  			return
   429  		}
   430  		if len(s) == pos {
   431  			return
   432  		}
   433  		switch s[pos] {
   434  		case '*':
   435  			newNilEdge(end, start)
   436  			nend := newNode()
   437  			newNilEdge(end, nend)
   438  			start, end = end, nend
   439  		case '+':
   440  			newNilEdge(end, start)
   441  			nend := newNode()
   442  			newNilEdge(end, nend)
   443  			end = nend
   444  		case '?':
   445  			newNilEdge(start, end)
   446  		default:
   447  			return
   448  		}
   449  		pos++
   450  		return
   451  	}
   452  	pcat := func() (start, end *node) {
   453  		for {
   454  			nstart, nend := pclosure()
   455  			if start == nil {
   456  				start, end = nstart, nend
   457  			} else if nstart != nend {
   458  				end.e = make([]*edge, len(nstart.e))
   459  				copy(end.e, nstart.e)
   460  				end = nend
   461  			}
   462  			if nstart == nend {
   463  				return
   464  			}
   465  		}
   466  		panic("unreachable")
   467  	}
   468  	pre = func() (start, end *node) {
   469  		start, end = pcat()
   470  		for pos < len(s) && s[pos] != ')' {
   471  			if s[pos] != '|' {
   472  				panic(ErrInternal)
   473  			}
   474  			pos++
   475  			nstart, nend := pcat()
   476  			tmp := newNode()
   477  			newNilEdge(tmp, start)
   478  			newNilEdge(tmp, nstart)
   479  			start = tmp
   480  			tmp = newNode()
   481  			newNilEdge(end, tmp)
   482  			newNilEdge(nend, tmp)
   483  			end = tmp
   484  		}
   485  		return
   486  	}
   487  	start, end := pre()
   488  	end.accept = true
   489  
   490  	// Compute shortlist of nodes (reachable nodes), as we may have discarded
   491  	// nodes left over from parsing. Also, make short[0] the start node.
   492  	short := make([]*node, 0, n)
   493  	{
   494  		var visit func(*node)
   495  		mark := make([]bool, n)
   496  		newn := make([]int, n)
   497  		visit = func(u *node) {
   498  			mark[u.n] = true
   499  			newn[u.n] = len(short)
   500  			short = append(short, u)
   501  			for _, e := range u.e {
   502  				if !mark[e.dst.n] {
   503  					visit(e.dst)
   504  				}
   505  			}
   506  		}
   507  		visit(start)
   508  		for _, v := range short {
   509  			v.n = newn[v.n]
   510  		}
   511  	}
   512  	n = len(short)
   513  
   514  	if nfadot != nil {
   515  		writeDotGraph(nfadot, start, "NFA_"+x.id)
   516  	}
   517  
   518  	// NFA -> DFA
   519  	nilClose := func(st []bool) {
   520  		mark := make([]bool, n)
   521  		var do func(int)
   522  		do = func(i int) {
   523  			v := short[i]
   524  			for _, e := range v.e {
   525  				if e.kind == kNil && !mark[e.dst.n] {
   526  					st[e.dst.n] = true
   527  					do(e.dst.n)
   528  				}
   529  			}
   530  		}
   531  		for i := 0; i < n; i++ {
   532  			if st[i] && !mark[i] {
   533  				mark[i] = true
   534  				do(i)
   535  			}
   536  		}
   537  	}
   538  	var todo []*node
   539  	tab := make(map[string]*node)
   540  	var buf []byte
   541  	dfacount := 0
   542  	{ // Construct the node of no return.
   543  		for i := 0; i < n; i++ {
   544  			buf = append(buf, '0')
   545  		}
   546  		tmp := new(node)
   547  		tmp.n = -1
   548  		tab[string(buf)] = tmp
   549  	}
   550  	newDFANode := func(st []bool) (res *node, found bool) {
   551  		buf = nil
   552  		accept := false
   553  		for i, v := range st {
   554  			if v {
   555  				buf = append(buf, '1')
   556  				accept = accept || short[i].accept
   557  			} else {
   558  				buf = append(buf, '0')
   559  			}
   560  		}
   561  		res, found = tab[string(buf)]
   562  		if !found {
   563  			res = new(node)
   564  			res.n = dfacount
   565  			res.accept = accept
   566  			dfacount++
   567  			for i, v := range st {
   568  				if v {
   569  					res.set = append(res.set, i)
   570  				}
   571  			}
   572  			tab[string(buf)] = res
   573  		}
   574  		return res, found
   575  	}
   576  
   577  	get := func(states []bool) *node {
   578  		nilClose(states)
   579  		node, old := newDFANode(states)
   580  		if !old {
   581  			todo = append(todo, node)
   582  		}
   583  		return node
   584  	}
   585  	getcb := func(v *node, cb func(*edge) bool) *node {
   586  		states := make([]bool, n)
   587  		for _, i := range v.set {
   588  			for _, e := range short[i].e {
   589  				if cb(e) {
   590  					states[e.dst.n] = true
   591  				}
   592  			}
   593  		}
   594  		return get(states)
   595  	}
   596  	states := make([]bool, n)
   597  	// The DFA start state is the state representing the nil-closure of the start
   598  	// node in the NFA. Recall it has index 0.
   599  	states[0] = true
   600  	dfastart := get(states)
   601  	for len(todo) > 0 {
   602  		v := todo[len(todo)-1]
   603  		todo = todo[0 : len(todo)-1]
   604  		// Singles.
   605  		var runes []rune
   606  		for r, _ := range sing {
   607  			runes = append(runes, r)
   608  		}
   609  		sort.Sort(RuneSlice(runes))
   610  		for _, r := range runes {
   611  			newRuneEdge(v, getcb(v, func(e *edge) bool {
   612  				return e.kind == kRune && e.r == r ||
   613  					e.kind == kWild ||
   614  					e.kind == kClass && e.negate != inClass(r, e.lim)
   615  			}), r)
   616  		}
   617  		// Character ranges.
   618  		for j := 0; j < len(lim); j += 2 {
   619  			e := newClassEdge(v, getcb(v, func(e *edge) bool {
   620  				return e.kind == kWild ||
   621  					e.kind == kClass && e.negate != inClass(lim[j], e.lim)
   622  			}))
   623  
   624  			e.lim = append(e.lim, lim[j], lim[j+1])
   625  		}
   626  		// Wild.
   627  		newWildEdge(v, getcb(v, func(e *edge) bool {
   628  			return e.kind == kWild || (e.kind == kClass && e.negate)
   629  		}))
   630  		// ^ and $.
   631  		newStartEdge(v, getcb(v, func(e *edge) bool { return e.kind == kStart }))
   632  		newEndEdge(v, getcb(v, func(e *edge) bool { return e.kind == kEnd }))
   633  	}
   634  	n = dfacount
   635  
   636  	if dfadot != nil {
   637  		writeDotGraph(dfadot, dfastart, "DFA_"+x.id)
   638  	}
   639  	// DFA -> Go
   640  	sorted := make([]*node, n)
   641  	for _, v := range tab {
   642  		if -1 != v.n {
   643  			sorted[v.n] = v
   644  		}
   645  	}
   646  
   647  	fmt.Fprintf(out, "\n// %v\n", string(x.regex))
   648  	for i, v := range sorted {
   649  		if i == 0 {
   650  			out.WriteString("{[]bool{")
   651  		} else {
   652  			out.WriteString(", ")
   653  		}
   654  		if v.accept {
   655  			out.WriteString("true")
   656  		} else {
   657  			out.WriteString("false")
   658  		}
   659  	}
   660  	out.WriteString("}, []func(rune) int{  // Transitions\n")
   661  	for _, v := range sorted {
   662  		out.WriteString("func(r rune) int {\n")
   663  		var runeCases, classCases string
   664  		var wildDest int
   665  		for _, e := range v.e {
   666  			m := e.dst.n
   667  			switch e.kind {
   668  			case kRune:
   669  				runeCases += fmt.Sprintf("\t\tcase %d: return %d\n", e.r, m)
   670  			case kClass:
   671  				classCases += fmt.Sprintf("\t\tcase %d <= r && r <= %d: return %d\n",
   672  					e.lim[0], e.lim[1], m)
   673  			case kWild:
   674  				wildDest = m
   675  			}
   676  		}
   677  		if runeCases != "" {
   678  			out.WriteString("\tswitch(r) {\n" + runeCases + "\t}\n")
   679  		}
   680  		if classCases != "" {
   681  			out.WriteString("\tswitch {\n" + classCases + "\t}\n")
   682  		}
   683  		fmt.Fprintf(out, "\treturn %v\n},\n", wildDest)
   684  	}
   685  	out.WriteString("}, []int{  /* Start-of-input transitions */ ")
   686  	for _, v := range sorted {
   687  		s := " -1,"
   688  		for _, e := range v.e {
   689  			if e.kind == kStart {
   690  				s = fmt.Sprintf(" %d,", e.dst.n)
   691  				break
   692  			}
   693  		}
   694  		out.WriteString(s)
   695  	}
   696  	out.WriteString("}, []int{  /* End-of-input transitions */ ")
   697  	for _, v := range sorted {
   698  		s := " -1,"
   699  		for _, e := range v.e {
   700  			if e.kind == kEnd {
   701  				s = fmt.Sprintf(" %d,", e.dst.n)
   702  				break
   703  			}
   704  		}
   705  		out.WriteString(s)
   706  	}
   707  	out.WriteString("},")
   708  	if len(x.kid) == 0 {
   709  		out.WriteString("nil")
   710  	} else {
   711  		out.WriteString("[]dfa{")
   712  		for _, kid := range x.kid {
   713  			gen(out, kid)
   714  		}
   715  		out.WriteString("}")
   716  	}
   717  	out.WriteString("},\n")
   718  }
   719  
   720  func writeFamily(out *bufio.Writer, node *rule, lvl int) {
   721  	tab := func() {
   722  		for i := 0; i <= lvl; i++ {
   723  			out.WriteByte('\t')
   724  		}
   725  	}
   726  	if node.startCode != "" {
   727  		tab()
   728  		prefixReplacer.WriteString(out, "if !yylex.stale {\n")
   729  		tab()
   730  		out.WriteString("\t" + node.startCode + "\n")
   731  		tab()
   732  		out.WriteString("}\n")
   733  	}
   734  	tab()
   735  	fmt.Fprintf(out, "OUTER%s%d:\n", node.id, lvl)
   736  	tab()
   737  	prefixReplacer.WriteString(out,
   738  	fmt.Sprintf("for {\nnext:=yylex.next(%v)\nlval.line = yylex.Line()+1\nlval.column = yylex.Column()+1\nswitch next {\n", lvl))
   739  	for i, x := range node.kid {
   740  		tab()
   741  		fmt.Fprintf(out, "\tcase %d:\n", i)
   742  		lvl++
   743  		if x.kid != nil {
   744  			writeFamily(out, x, lvl)
   745  		} else {
   746  			tab()
   747  			out.WriteString("\t" + x.code + "\n")
   748  		}
   749  		lvl--
   750  	}
   751  	tab()
   752  	out.WriteString("\tdefault:\n")
   753  	tab()
   754  	fmt.Fprintf(out, "\t\t break OUTER%s%d\n", node.id, lvl)
   755  	tab()
   756  	out.WriteString("\t}\n")
   757  	tab()
   758  	out.WriteString("\tcontinue\n")
   759  	tab()
   760  	out.WriteString("}\n")
   761  	tab()
   762  	prefixReplacer.WriteString(out, "yylex.pop()\n")
   763  	tab()
   764  	out.WriteString(node.endCode + "\n")
   765  }
   766  
   767  var lexertext = `import ("bufio";"io";"strings")
   768  type frame struct {
   769    i int
   770    s string
   771    line, column int
   772  }
   773  
   774  type Lexer struct {
   775    // The lexer runs in its own goroutine, and communicates via channel 'ch'.
   776    ch chan frame
   777    ch_stop chan bool
   778    // We record the level of nesting because the action could return, and a
   779    // subsequent call expects to pick up where it left off. In other words,
   780    // we're simulating a coroutine.
   781    // TODO: Support a channel-based variant that compatible with Go's yacc.
   782    stack []frame
   783    stale bool
   784  
   785    // The 'l' and 'c' fields were added for
   786    // https://github.com/wagerlabs/docker/blob/65694e801a7b80930961d70c69cba9f2465459be/buildfile.nex
   787    // Now used to record last seen line & column from the stack.
   788    l, c int
   789  
   790    parseResult interface{}
   791  
   792    // The following line makes it easy for scripts to insert fields in the
   793    // generated code.
   794    // [NEX_END_OF_LEXER_STRUCT]
   795  }
   796  
   797  // NewLexerWithInit creates a new Lexer object, runs the given callback on it,
   798  // then returns it.
   799  func NewLexerWithInit(in io.Reader, initFun func(*Lexer)) *Lexer {
   800    yylex := new(Lexer)
   801    if initFun != nil {
   802      initFun(yylex)
   803    }
   804    yylex.ch = make(chan frame)
   805    yylex.ch_stop = make(chan bool, 1)
   806    var scan func(in *bufio.Reader, ch chan frame, ch_stop chan bool, family []dfa, line, column int) 
   807    scan = func(in *bufio.Reader, ch chan frame, ch_stop chan bool, family []dfa, line, column int) {
   808      // Index of DFA and length of highest-precedence match so far.
   809      matchi, matchn := 0, -1
   810      var buf []rune
   811      n := 0
   812      checkAccept := func(i int, st int) bool {
   813        // Higher precedence match? DFAs are run in parallel, so matchn is at most len(buf), hence we may omit the length equality check.
   814        if family[i].acc[st] && (matchn < n || matchi > i) {
   815          matchi, matchn = i, n
   816          return true
   817        }
   818        return false
   819      }
   820      stateCap := len(family)
   821      if stateCap == 0 { stateCap = 1 }
   822      state := make([][2]int, 0, stateCap)
   823      for i := 0; i < len(family); i++ {
   824        mark := make([]bool, len(family[i].startf))
   825        // Every DFA starts at state 0.
   826        st := 0
   827        for {
   828          state = append(state, [2]int{i, st})
   829          mark[st] = true
   830          // As we're at the start of input, follow all ^ transitions and append to our list of start states.
   831          st = family[i].startf[st]
   832          if -1 == st || mark[st] { break }
   833          // We only check for a match after at least one transition.
   834          checkAccept(i, st)
   835        }
   836      }
   837      atEOF := false
   838      stopped := false
   839  
   840  loop:
   841      for {
   842        if n == len(buf) && !atEOF {
   843          r,_,err := in.ReadRune()
   844          switch err {
   845          case io.EOF: atEOF = true
   846          case nil:    buf = append(buf, r)
   847          default:     panic(err)
   848          }
   849        }
   850        if !atEOF {
   851          r := buf[n]
   852          n++
   853          d := 0
   854          for _, x := range state {
   855            x[1] = family[x[0]].f[x[1]](r)
   856            if -1 == x[1] { continue }
   857            state[d] = x
   858            d++
   859            checkAccept(x[0], x[1])
   860          }
   861          state = state[:d]
   862        } else {
   863  dollar:  // Handle $.
   864          for _, x := range state {
   865            mark := make([]bool, len(family[x[0]].endf))
   866            for {
   867              mark[x[1]] = true
   868              x[1] = family[x[0]].endf[x[1]]
   869              if -1 == x[1] || mark[x[1]] { break }
   870              if checkAccept(x[0], x[1]) {
   871                // Unlike before, we can break off the search. Now that we're at the end, there's no need to maintain the state of each DFA.
   872                break dollar
   873              }
   874            }
   875          }
   876          state = state[:0]
   877        }
   878  
   879        if len(state) == 0 {
   880          lcUpdate := func(r rune) {
   881            if r == '\n' {
   882              line++
   883              column = 0
   884            } else {
   885              column++
   886            }
   887          }
   888          // All DFAs stuck. Return last match if it exists, otherwise advance by one rune and restart all DFAs.
   889          if matchn == -1 {
   890            if len(buf) == 0 {  // This can only happen at the end of input.
   891              break
   892            }
   893            lcUpdate(buf[0])
   894            buf = buf[1:]
   895          } else {
   896            text := string(buf[:matchn])
   897            buf = buf[matchn:]
   898            matchn = -1
   899  
   900            select {
   901            case <- ch_stop:
   902              stopped = true
   903              break loop
   904            default:
   905            }
   906            select {
   907            case ch <- frame{matchi, text, line, column}:
   908            case  <- ch_stop:
   909              stopped = true
   910              break loop
   911            }
   912            if len(family[matchi].nest) > 0 {
   913              scan(bufio.NewReader(strings.NewReader(text)), ch, ch_stop, family[matchi].nest, line, column)
   914            }
   915            if atEOF {
   916              break
   917            }
   918            for _, r := range text {
   919              lcUpdate(r)
   920            }
   921          }
   922          n = 0
   923          if len(family) > cap(state) {
   924            state = make([][2]int, 0, len(family))
   925          }
   926          for i := 0; i < len(family); i++ {
   927            state = append(state, [2]int{i, 0})
   928          }
   929        }
   930      }
   931      select {
   932        case <- ch_stop:
   933          stopped = true
   934        default:
   935      }
   936      if !stopped {
   937      select {
   938        case ch <- frame{-1, "", line, column}:
   939       
   940        case <- ch_stop:
   941        }
   942      }
   943    }
   944    go scan(bufio.NewReader(in), yylex.ch, yylex.ch_stop, dfas, 0, 0)
   945    return yylex
   946  }
   947  
   948  type dfa struct {
   949    acc []bool  // Accepting states.
   950    f []func(rune) int  // Transitions.
   951    startf, endf []int  // Transitions at start and end of input.
   952    nest []dfa
   953  }
   954  
   955  var dfas = []dfa{`
   956  
   957  var lexeroutro = `}
   958  
   959  func NewLexer(in io.Reader) *Lexer {
   960    return NewLexerWithInit(in, nil)
   961  }
   962  
   963  func (yyLex *Lexer) Stop() {
   964    select {
   965    case yyLex.ch_stop <- true:
   966    default:
   967    }
   968  }
   969  
   970  // Text returns the matched text.
   971  func (yylex *Lexer) Text() string {
   972    return yylex.stack[len(yylex.stack) - 1].s
   973  }
   974  
   975  // Line returns the current line number.
   976  // The first line is 0.
   977  func (yylex *Lexer) Line() int {
   978    if len(yylex.stack) == 0 {
   979      return yylex.l
   980    }
   981    return yylex.stack[len(yylex.stack) - 1].line
   982  }
   983  
   984  // Column returns the current column number.
   985  // The first column is 0.
   986  func (yylex *Lexer) Column() int {
   987    if len(yylex.stack) == 0 {
   988      return yylex.c
   989    }
   990    return yylex.stack[len(yylex.stack) - 1].column
   991  }
   992  
   993  func (yylex *Lexer) next(lvl int) int {
   994    if lvl == len(yylex.stack) {
   995      l, c := 0, 0
   996      if lvl > 0 {
   997        l, c = yylex.stack[lvl - 1].line, yylex.stack[lvl - 1].column
   998      }
   999      yylex.stack = append(yylex.stack, frame{0, "", l, c})
  1000    }
  1001    if lvl == len(yylex.stack) - 1 {
  1002      p := &yylex.stack[lvl]
  1003      *p = <-yylex.ch
  1004      yylex.stale = false
  1005    } else {
  1006      yylex.stale = true
  1007    }
  1008    return yylex.stack[lvl].i
  1009  }
  1010  func (yylex *Lexer) pop() {
  1011    l := len(yylex.stack)-1
  1012    yylex.l, yylex.c = yylex.stack[l].line, yylex.stack[l].column
  1013    yylex.stack = yylex.stack[:l]
  1014  }
  1015  `
  1016  
  1017  func writeLex(out *bufio.Writer, root rule) {
  1018  	if !customError {
  1019  		// TODO: I can't remember what this was for!
  1020  		prefixReplacer.WriteString(out, `func (yylex Lexer) Error(e string) {
  1021    panic(e)
  1022  }`)
  1023  	}
  1024  	prefixReplacer.WriteString(out, `
  1025  // Lex runs the lexer.
  1026  // When the -s option is given, this function is not generated;
  1027  // instead, the NN_FUN macro runs the lexer.
  1028  // yySymType is expected to include the int fields, line and column.
  1029  func (yylex *Lexer) Lex(lval *yySymType) int {
  1030  `)
  1031  	writeFamily(out, &root, 0)
  1032  	out.WriteString("\treturn 0\n}\n")
  1033  }
  1034  func writeNNFun(out *bufio.Writer, root rule) {
  1035  	prefixReplacer.WriteString(out, "func(yylex *Lexer) {\n")
  1036  	writeFamily(out, &root, 0)
  1037  	out.WriteString("}")
  1038  }
  1039  func process(output io.Writer, input io.Reader) error {
  1040  	lineno := 1
  1041  	in := bufio.NewReader(input)
  1042  	out := bufio.NewWriter(output)
  1043  	var r rune
  1044  	read := func() bool {
  1045  		var err error
  1046  		r, _, err = in.ReadRune()
  1047  		if err == io.EOF {
  1048  			return true
  1049  		}
  1050  		if err != nil {
  1051  			panic(err)
  1052  		}
  1053  		if r == '\n' {
  1054  			lineno++
  1055  		}
  1056  		return false
  1057  	}
  1058  	skipws := func() bool {
  1059  		for !read() {
  1060  			if strings.IndexRune(" \n\t\r", r) == -1 {
  1061  				return false
  1062  			}
  1063  		}
  1064  		return true
  1065  	}
  1066  	var buf []rune
  1067  	readCode := func() string {
  1068  		if '{' != r {
  1069  			panic(ErrExpectedLBrace)
  1070  		}
  1071  		buf = []rune{r}
  1072  		nesting := 1
  1073  		for {
  1074  			if read() {
  1075  				panic(ErrUnmatchedLBrace)
  1076  			}
  1077  			buf = append(buf, r)
  1078  			if '{' == r {
  1079  				nesting++
  1080  			} else if '}' == r {
  1081  				nesting--
  1082  				if 0 == nesting {
  1083  					break
  1084  				}
  1085  			}
  1086  		}
  1087  		return string(buf)
  1088  	}
  1089  	var root rule
  1090  	needRootRAngle := false
  1091  	var parse func(*rule) error
  1092  	parse = func(node *rule) error {
  1093  		for {
  1094  			panicIf(skipws, ErrUnexpectedEOF)
  1095  			if '<' == r {
  1096  				if node != &root || len(node.kid) > 0 {
  1097  					panic(ErrUnexpectedLAngle)
  1098  				}
  1099  				panicIf(skipws, ErrUnexpectedEOF)
  1100  				node.startCode = readCode()
  1101  				needRootRAngle = true
  1102  				continue
  1103  			} else if '>' == r {
  1104  				if node == &root {
  1105  					if !needRootRAngle {
  1106  						panic(ErrUnmatchedRAngle)
  1107  					}
  1108  				}
  1109  				if skipws() {
  1110  					return ErrUnexpectedEOF
  1111  				}
  1112  				node.endCode = readCode()
  1113  				return nil
  1114  			}
  1115  			delim := r
  1116  			panicIf(read, ErrUnexpectedEOF)
  1117  			var regex []rune
  1118  			for {
  1119  				if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') {
  1120  					break
  1121  				}
  1122  				if '\n' == r {
  1123  					return ErrUnexpectedNewline
  1124  				}
  1125  				regex = append(regex, r)
  1126  				panicIf(read, ErrUnexpectedEOF)
  1127  			}
  1128  			if "" == string(regex) {
  1129  				break
  1130  			}
  1131  			panicIf(skipws, ErrUnexpectedEOF)
  1132  			x := new(rule)
  1133  			x.id = fmt.Sprintf("%d", lineno)
  1134  			node.kid = append(node.kid, x)
  1135  			x.regex = make([]rune, len(regex))
  1136  			copy(x.regex, regex)
  1137  			if '<' == r {
  1138  				panicIf(skipws, ErrUnexpectedEOF)
  1139  				x.startCode = readCode()
  1140  				parse(x)
  1141  			} else {
  1142  				x.code = readCode()
  1143  			}
  1144  		}
  1145  		return nil
  1146  	}
  1147  	err := parse(&root)
  1148  	if err != nil {
  1149  		return err
  1150  	}
  1151  
  1152  	buf = nil
  1153  	for done := skipws(); !done; done = read() {
  1154  		buf = append(buf, r)
  1155  	}
  1156  	fs := token.NewFileSet()
  1157  	// Append a blank line to make things easier when there are only package and
  1158  	// import declarations.
  1159  	t, err := parser.ParseFile(fs, "", string(buf)+"\n", parser.ImportsOnly)
  1160  	if err != nil {
  1161  		panic(err)
  1162  	}
  1163  	printer.Fprint(out, fs, t)
  1164  
  1165  	var file *token.File
  1166  	fs.Iterate(func(f *token.File) bool {
  1167  		file = f
  1168  		return true
  1169  	})
  1170  
  1171  	// Skip over package and import declarations. This is why we appended a blank
  1172  	// line above.
  1173  	for m := file.LineCount(); m > 1; m-- {
  1174  		i := 0
  1175  		for '\n' != buf[i] {
  1176  			i++
  1177  		}
  1178  		buf = buf[i+1:]
  1179  	}
  1180  
  1181  	prefixReplacer.WriteString(out, lexertext)
  1182  
  1183  	for _, kid := range root.kid {
  1184  		gen(out, kid)
  1185  	}
  1186  	prefixReplacer.WriteString(out, lexeroutro)
  1187  	if !standalone {
  1188  		writeLex(out, root)
  1189  		out.WriteString(string(buf))
  1190  		out.Flush()
  1191  		if len(outFilename) > 0 {
  1192  			gofmt()
  1193  		}
  1194  		return nil
  1195  	}
  1196  	m := 0
  1197  	const funmac = "NN_FUN"
  1198  	for m < len(buf) {
  1199  		m++
  1200  		if funmac[:m] != string(buf[:m]) {
  1201  			out.WriteString(string(buf[:m]))
  1202  			buf = buf[m:]
  1203  			m = 0
  1204  		} else if funmac == string(buf[:m]) {
  1205  			writeNNFun(out, root)
  1206  			buf = buf[m:]
  1207  			m = 0
  1208  		}
  1209  	}
  1210  	out.WriteString(string(buf))
  1211  	out.Flush()
  1212  	if len(outFilename) > 0 {
  1213  		gofmt()
  1214  	}
  1215  	return nil
  1216  }
  1217  
  1218  func gofmt() {
  1219  	src, err := ioutil.ReadFile(outFilename)
  1220  	if err != nil {
  1221  		return
  1222  	}
  1223  	src, err = format.Source(src)
  1224  	if err != nil {
  1225  		return
  1226  	}
  1227  	ioutil.WriteFile(outFilename, src, 0666)
  1228  }
  1229  
  1230  func panicIf(f func() bool, err error) {
  1231  	if f() {
  1232  		panic(err)
  1233  	}
  1234  }
  1235  
  1236  func dieIf(cond bool, v ...interface{}) {
  1237  	if cond {
  1238  		log.Fatal(v...)
  1239  	}
  1240  }
  1241  
  1242  func dieErr(err error, s string) {
  1243  	if err != nil {
  1244  		log.Fatalf("%v: %v", s, err)
  1245  	}
  1246  }
  1247  
  1248  func createDotFile(filename string) *os.File {
  1249  	if filename == "" {
  1250  		return nil
  1251  	}
  1252  	suf := strings.HasSuffix(filename, ".nex")
  1253  	dieIf(suf, "nex: DOT filename ends with .nex:", filename)
  1254  	file, err := os.Create(filename)
  1255  	dieErr(err, "Create")
  1256  	return file
  1257  }