github.com/jmigpin/editor@v1.6.0/util/parseutil/lrparser/contentparser.go (about)

     1  package lrparser
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"strings"
     7  
     8  	"github.com/jmigpin/editor/util/goutil"
     9  )
    10  
    11  type ContentParser struct {
    12  	Opt          *CpOpt
    13  	vd           *VerticesData
    14  	sd           *StatesData
    15  	buildNodeFns map[Rule]BuildNodeFn
    16  }
    17  
    18  func newContentParser(opt *CpOpt, ri *RuleIndex) (*ContentParser, error) {
    19  	cp := &ContentParser{Opt: opt}
    20  	cp.buildNodeFns = map[Rule]BuildNodeFn{}
    21  
    22  	vd, err := newVerticesData(ri, cp.Opt.StartRule, cp.Opt.Reverse)
    23  	if err != nil {
    24  		return nil, err
    25  	}
    26  	cp.vd = vd
    27  
    28  	sd, err := newStatesData(vd, cp.Opt.ShiftOnSRConflict)
    29  	if err != nil {
    30  		if sd != nil {
    31  			err = fmt.Errorf("%w\n%v\n%v\n%v", err, ri, vd.rFirst, sd)
    32  		}
    33  		return nil, err
    34  	}
    35  	cp.sd = sd
    36  
    37  	return cp, nil
    38  }
    39  
    40  //----------
    41  
    42  func (cp *ContentParser) Parse(src []byte, index int) (*BuildNodeData, *cpRun, error) {
    43  	fset := NewFileSetFromBytes(src)
    44  	return cp.ParseFileSet(fset, index, nil)
    45  }
    46  func (cp *ContentParser) ParseFileSet(fset *FileSet, index int, extData any) (*BuildNodeData, *cpRun, error) {
    47  	ps := NewPState(fset.Src)
    48  	ps.Pos = index
    49  	ps.Reverse = cp.Opt.Reverse
    50  	cpr := newCPRun(cp.Opt, ps)
    51  	cpr.externalData = extData
    52  	cpn, err := cp.parse3(cpr)
    53  	if err != nil {
    54  		pe := &PosError{Err: err, Pos: cpr.ps.Pos}
    55  		err = fset.Error(pe)
    56  		if cpr.opt.VerboseError {
    57  			err = fmt.Errorf("%w\n%s", err, cpr.Debug(cp))
    58  		}
    59  		return nil, cpr, err
    60  	}
    61  	d := newBuildNodeData(cpr, cpn)
    62  	return d, cpr, nil
    63  }
    64  
    65  //----------
    66  
    67  func (cp *ContentParser) parse3(cpr *cpRun) (*CPNode, error) {
    68  	// add initial state to stack
    69  	cpn0 := newCPNode(cpr.ps.Pos, cpr.ps.Pos, nil)
    70  	item0 := &cpsItem{st: cp.sd.states[0], cpn: cpn0}
    71  	cpr.stk = cpStack{item0}
    72  	cpr.logf("%v\n", cpr.stk)
    73  	// first input (action rule)
    74  	prule, err := cp.nextParseRule(cpr, item0.st)
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  	// run forever
    79  	for {
    80  		item := cpr.stk[len(cpr.stk)-1] // stack top
    81  
    82  		as := item.st.action[prule]
    83  		// TODO: deal with this error at statesdata build time?
    84  		if len(as) != 1 {
    85  			return nil, fmt.Errorf("expected one action for %v, got %v (st=%v)", prule, as, item.st.id)
    86  		}
    87  		a := as[0]
    88  
    89  		switch t := a.(type) {
    90  		case *ActionShift:
    91  			prule, err = cp.shift(cpr, t)
    92  			if err != nil {
    93  				return nil, err
    94  			}
    95  		case *ActionReduce:
    96  			if err := cp.reduce(cpr, t); err != nil {
    97  				return nil, err
    98  			}
    99  		case *ActionAccept:
   100  			// handle earlystop (nodes with errors)
   101  			if item.cpn.simulated {
   102  				return nil, cpr.earlyStop.err
   103  			}
   104  
   105  			return item.cpn, nil
   106  		default:
   107  			return nil, goutil.TodoError()
   108  		}
   109  	}
   110  }
   111  func (cp *ContentParser) shift(cpr *cpRun, t *ActionShift) (Rule, error) {
   112  	// correct simulated node position
   113  	cpn := cpr.ps.Node.(*CPNode)
   114  	if cpn.simulated {
   115  		i := cpr.stk.topEnd()
   116  		cpn.SetPos(i, i)
   117  	}
   118  
   119  	cpr.logf("shift %v\n", t.st.id)
   120  	item := &cpsItem{st: t.st, cpn: cpn}
   121  	cpr.stk = append(cpr.stk, item)
   122  	cpr.logf("%v\n", cpr.stk)
   123  
   124  	if err := cp.buildNode(cpr, cpn.rule, cpn); err != nil {
   125  		return nil, err
   126  	}
   127  
   128  	// next input
   129  	return cp.nextParseRule(cpr, t.st)
   130  }
   131  func (cp *ContentParser) reduce(cpr *cpRun, ar *ActionReduce) error {
   132  	if cpr.isLogging() { // performance
   133  		cpr.logf("reduce to %v (pop %v)\n", ar.prod.id(), ar.popN)
   134  	}
   135  
   136  	// pop n items
   137  	popPos := len(cpr.stk) - ar.popN
   138  	pops := cpr.stk[popPos:]
   139  	cpr.stk = cpr.stk[:popPos] // pop
   140  
   141  	// use current stk top to find the rule transition
   142  	item3 := cpr.stk[len(cpr.stk)-1] // top of stack
   143  	st2, ok := item3.st.gotoSt[ar.prod]
   144  	if !ok {
   145  		return fmt.Errorf("no goto for rule %v in %v ", ar.prod.id(), item3.st.id)
   146  	}
   147  	cpn, err := cp.groupPopped(cpr, ar, pops)
   148  	if err != nil {
   149  		return err
   150  	}
   151  	item4 := &cpsItem{st: st2, cpn: cpn}
   152  	cpr.stk = append(cpr.stk, item4) // push "goto" to stk
   153  	cpr.logf("%v\n", cpr.stk)
   154  
   155  	return cp.buildNode(cpr, ar.prod, cpn)
   156  }
   157  
   158  //----------
   159  
   160  func (cp *ContentParser) buildNode(cpr *cpRun, r Rule, cpn *CPNode) error {
   161  	if cpn.simulated {
   162  		return nil
   163  	}
   164  	fn, ok := cp.buildNodeFns[r]
   165  	if !ok {
   166  		return nil
   167  	}
   168  	d := newBuildNodeData(cpr, cpn)
   169  	return fn(d)
   170  }
   171  
   172  //----------
   173  
   174  func (cp *ContentParser) groupPopped(cpr *cpRun, ar *ActionReduce, pops []*cpsItem) (*CPNode, error) {
   175  	cpn := cp.groupPopped2(cpr, ar, pops)
   176  	cp.propagateSimulatedAndRecover(cpr, ar, cpn)
   177  	return cpn, nil
   178  }
   179  func (cp *ContentParser) groupPopped2(cpr *cpRun, ar *ActionReduce, pops []*cpsItem) *CPNode {
   180  	if len(pops) == 0 { // handle no pops reductions (nil rules)
   181  		i := cpr.stk.topEnd()
   182  		cpn := newCPNode(i, i, ar.prod)
   183  		return cpn
   184  	} else {
   185  		// group popped items nodes into one node
   186  		w := make([]*CPNode, 0, len(pops))
   187  		for _, item2 := range pops {
   188  			w = append(w, item2.cpn)
   189  		}
   190  		cpn := newCPNode2(w[0], w[len(w)-1], ar.prod)
   191  		isReverse := cp.Opt.Reverse && ruleProdCanReverse(ar.prod)
   192  		cpn.addChilds(isReverse, w...)
   193  		return cpn
   194  	}
   195  }
   196  func (cp *ContentParser) propagateSimulatedAndRecover(cpr *cpRun, ar *ActionReduce, cpn *CPNode) {
   197  	simulatedChilds := false
   198  	for _, cpn2 := range cpn.childs {
   199  		if cpn2.simulated {
   200  			simulatedChilds = true
   201  			break
   202  		}
   203  	}
   204  	if !simulatedChilds {
   205  		return
   206  	}
   207  
   208  	// attempt to recover simulated childs
   209  	if dr, ok := cpn.rule.(*DefRule); ok {
   210  		if dr.isPOptional {
   211  			cpn.childs = nil
   212  			cpn.SetPos(cpn.Pos(), cpn.Pos()) // clear end (as if empty)
   213  			cpr.logf("recovered: optional\n")
   214  			return
   215  		}
   216  		if dr.isPZeroOrMore {
   217  			*cpn = *cpn.childs[0]
   218  			cpr.logf("recovered: pZeroOrMore\n")
   219  			return
   220  		}
   221  		if dr.isPOneOrMore {
   222  			if !cpn.childs[0].PosEmpty() {
   223  				cpn.childs = cpn.childs[0].childs
   224  				cpr.logf("recovered: pOneOrMore\n")
   225  				return
   226  			}
   227  		}
   228  	}
   229  
   230  	// simulated
   231  	cpn.simulated = true
   232  	cpn.childs = nil
   233  	cpn.SetPos(cpn.Pos(), cpn.Pos()) // clear end (as if empty)
   234  }
   235  
   236  //----------
   237  
   238  func (cp *ContentParser) nextParseRule(cpr *cpRun, st *State) (Rule, error) {
   239  	cpr.logf("rset: %v\n", st.rsetSorted)
   240  
   241  	if cpr.earlyStop.on {
   242  		return cp.simulateParseRuleSet(cpr, st)
   243  	}
   244  
   245  	r, err := cp.parseRuleSet(cpr, st.rsetSorted)
   246  	if err == nil {
   247  		return r, nil
   248  	}
   249  
   250  	// allow input to not be fully consumed
   251  	if cp.Opt.EarlyStop {
   252  		cpr.logf("earlystop: %v\n", err)
   253  		cpr.earlyStop.on = true
   254  		cpr.earlyStop.err = &PosError{Err: err, Pos: cpr.ps.Pos}
   255  		return cp.simulateParseRuleSet(cpr, st)
   256  	}
   257  
   258  	return nil, err
   259  }
   260  
   261  //----------
   262  
   263  func (cp *ContentParser) simulateParseRuleSet(cpr *cpRun, st *State) (Rule, error) {
   264  	// rule to simulate
   265  	r := (Rule)(nil)
   266  	if st.rsetHasEndRule { // performance: faster stop (not necessary)
   267  		r = endRule
   268  	} else {
   269  		if len(st.rsetSorted) == 0 {
   270  			return nil, fmt.Errorf("empty rset to simulate")
   271  		}
   272  
   273  		// get index to try next
   274  		k := cpr.earlyStop.simStateRsetIter[st] % len(st.rsetSorted)
   275  		cpr.earlyStop.simStateRsetIter[st]++
   276  		maxIter := 20
   277  		if cpr.earlyStop.simStateRsetIter[st] >= maxIter {
   278  			return nil, fmt.Errorf("reached max simulated attempts: %v; %w", maxIter, cpr.earlyStop.err)
   279  		}
   280  
   281  		r = st.rsetSorted[k]
   282  	}
   283  
   284  	i := cpr.stk.topEnd()
   285  	cpn := newCPNode(i, i, r)
   286  	cpn.simulated = true
   287  	cpr.ps.Node = cpn
   288  	if cpr.isLogging() { // performance
   289  		cpr.logf("simulate parseruleset: %v %v\n", r.id(), PNodePosStr(cpn))
   290  	}
   291  
   292  	return r, nil
   293  }
   294  
   295  //----------
   296  
   297  // creates a cpnode in ps
   298  func (cp *ContentParser) parseRuleSet(cpr *cpRun, rset []Rule) (Rule, error) {
   299  	for _, r := range rset {
   300  		if err := cp.parseRule(cpr.ps, r); err != nil {
   301  			continue
   302  		}
   303  		if cpr.isLogging() { // performance
   304  			cpr.logf("parseruleset: %v %v\n", r.id(), PNodePosStr(cpr.ps.Node))
   305  		}
   306  		return r, nil
   307  	}
   308  	return nil, fmt.Errorf("failed to parse next: %v", rset)
   309  }
   310  
   311  func (cp *ContentParser) parseRule(ps *PState, r Rule) error {
   312  	switch t := r.(type) {
   313  	case *StringRule:
   314  		pos0 := ps.Pos
   315  		if err := t.parse(ps); err != nil {
   316  			return err
   317  		}
   318  		ps.Node = newCPNode(pos0, ps.Pos, t)
   319  	case *FuncRule:
   320  		pos0 := ps.KeepPos()
   321  		if err := t.fn(ps); err != nil {
   322  			pos0.Restore()
   323  			return err
   324  		}
   325  		ps.Node = newCPNode(pos0.Pos, ps.Pos, t)
   326  	case *SingletonRule:
   327  		switch t {
   328  		//case nilRule:	// commented: not called to be parsed
   329  		case endRule:
   330  			if !ps.M.Eof() {
   331  				return fmt.Errorf("not eof")
   332  			}
   333  			ps.Node = newCPNode(ps.Pos, ps.Pos, t)
   334  		default:
   335  			panic(goutil.TodoErrorStr(t.name))
   336  		}
   337  	default:
   338  		panic(goutil.TodoErrorType(t))
   339  	}
   340  	return nil
   341  }
   342  
   343  //----------
   344  
   345  func (cp *ContentParser) SetBuildNodeFn(name string, buildFn BuildNodeFn) error {
   346  	r, ok := cp.vd.rFirst.ri.get(name)
   347  	if !ok {
   348  		return fmt.Errorf("rule name not found: %v", name)
   349  	}
   350  	cp.buildNodeFns[r] = buildFn
   351  	return nil
   352  }
   353  
   354  //----------
   355  //----------
   356  //----------
   357  
   358  // content parser options
   359  type CpOpt struct {
   360  	StartRule         string // can be empty, will try to get it from grammar
   361  	VerboseError      bool
   362  	EarlyStop         bool // artificially parses an endrule when nextparsedrule fails. Allows parsing to stop successfully when no more input is recognized (although there is still input), while the rules are still able to reduce correctly.
   363  	ShiftOnSRConflict bool
   364  	Reverse           bool // runs input/rules in reverse (useful to backtrack in the middle of big inputs to then parse normally)
   365  }
   366  
   367  //----------
   368  //----------
   369  //----------
   370  
   371  type cpRun struct {
   372  	opt       *CpOpt
   373  	ps        *PState
   374  	stk       cpStack
   375  	earlyStop struct {
   376  		on               bool
   377  		err              error
   378  		simStateRsetIter map[*State]int // iterate over state rset rules to avoid repeating simulated
   379  	}
   380  	logBuf       bytes.Buffer
   381  	externalData any
   382  }
   383  
   384  func newCPRun(opt *CpOpt, ps *PState) *cpRun {
   385  	cpr := &cpRun{opt: opt, ps: ps}
   386  	cpr.earlyStop.simStateRsetIter = map[*State]int{}
   387  	return cpr
   388  }
   389  func (cpr *cpRun) isLogging() bool {
   390  	return cpr.opt.VerboseError
   391  }
   392  func (cpr *cpRun) logf(f string, args ...any) {
   393  	if cpr.isLogging() {
   394  		fmt.Fprintf(&cpr.logBuf, f, args...)
   395  	}
   396  }
   397  func (cpr *cpRun) Debug(cp *ContentParser) string {
   398  	return fmt.Sprintf("%s\n%s\n%s\n%s%s",
   399  		cp.vd.rFirst.ri,
   400  		cp.vd.rFirst,
   401  		cp.vd,
   402  		cp.sd,
   403  		bytes.TrimSpace(cpr.logBuf.Bytes()),
   404  	)
   405  }
   406  
   407  //----------
   408  //----------
   409  //----------
   410  
   411  // content parser stack
   412  type cpStack []*cpsItem
   413  
   414  func (stk cpStack) topEnd() int {
   415  	k := len(stk) - 1
   416  	return stk[k].cpn.End()
   417  }
   418  
   419  //godebug:annotateoff
   420  func (stk cpStack) String() string {
   421  	u := []string{}
   422  	for _, item := range stk {
   423  		s := fmt.Sprintf("%v:", item.st.id)
   424  		if item.cpn != nil { // can be nil in state0
   425  			if item.cpn.rule != nil { // can be nil in state0
   426  				s += fmt.Sprintf(" %v", item.cpn.rule.id())
   427  			}
   428  			s += " " + PNodePosStr(item.cpn)
   429  			if item.cpn.simulated {
   430  				s += fmt.Sprintf(" (simulated)")
   431  			}
   432  		}
   433  		u = append(u, s)
   434  	}
   435  	return fmt.Sprintf("stk{\n\t%v\n}", strings.Join(u, "\n\t"))
   436  }
   437  
   438  //----------
   439  
   440  // content parser stack item
   441  type cpsItem struct {
   442  	st  *State
   443  	cpn *CPNode
   444  	//simulated bool // TODO: move cpn.simulated here
   445  }
   446  
   447  //----------
   448  //----------
   449  //----------
   450  
   451  func indentStr(t string, u string) string {
   452  	u = strings.TrimRight(u, "\n")
   453  	u = t + strings.ReplaceAll(u, "\n", "\n"+t) + "\n"
   454  	return u
   455  }