github.com/jmigpin/editor@v1.6.0/util/parseutil/lrparser/statesdata.go (about)

     1  package lrparser
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/jmigpin/editor/util/iout"
     8  )
     9  
    10  type StatesData struct {
    11  	states            []*State
    12  	shiftOnSRConflict bool
    13  }
    14  
    15  func newStatesData(vd *VerticesData, shiftOnSRConflict bool) (*StatesData, error) {
    16  	sd := &StatesData{shiftOnSRConflict: shiftOnSRConflict}
    17  	if err := sd.build(vd); err != nil {
    18  		return nil, err
    19  	}
    20  	if err := sd.checkStringsConflicts(vd); err != nil {
    21  		return sd, err // also return sd for debug
    22  	}
    23  	if err := sd.checkActionConflicts(vd); err != nil {
    24  		return sd, err // also return sd for debug
    25  	}
    26  	return sd, nil
    27  }
    28  
    29  func (sd *StatesData) build(vd *VerticesData) error {
    30  	// map all states (using ints)
    31  	sd.states = make([]*State, len(vd.verts))
    32  	for _, v := range vd.verts {
    33  		id := stateId(v.id)
    34  		sd.states[id] = newState(id)
    35  	}
    36  
    37  	addAction := func(st *State, r Rule, a Action) {
    38  		st.action[r] = append(st.action[r], a)
    39  	}
    40  
    41  	// construct states
    42  	for _, v := range vd.verts {
    43  		st := sd.states[int(v.id)]
    44  
    45  		// action: shift
    46  		for r, v2 := range v.gotoVert {
    47  			st2 := sd.states[int(v2.id)]
    48  			if r.isTerminal() {
    49  				a := &ActionShift{st: st2}
    50  				addAction(st, r, a)
    51  			} else {
    52  				// goto transitions
    53  				st.gotoSt[r] = st2
    54  			}
    55  		}
    56  		// action: reduce
    57  		for rd, las := range v.rdslasC {
    58  			if !rd.dotAtEnd() {
    59  				continue
    60  			}
    61  			if rd.prod == startRule {
    62  				if las.has(endRule) {
    63  					addAction(st, endRule, &ActionAccept{})
    64  				}
    65  			} else {
    66  				for r2 := range las {
    67  					a := &ActionReduce{
    68  						prod: rd.prod,
    69  						popN: rd.popLen(),
    70  					}
    71  					addAction(st, r2, a)
    72  				}
    73  			}
    74  		}
    75  
    76  		// commented: done above at "action shift"
    77  		//// goto transitions
    78  		//for r, v2 := range v.gotoVert {
    79  		//	if !ruleIsTerminal(r) {
    80  		//		st.gotoSt[r] = sd.states[int(v2.id)]
    81  		//	}
    82  		//}
    83  
    84  		rset := RuleSet{}
    85  		// compute rset for parsenextrule
    86  		for rd, _ := range v.rdslasK {
    87  			if r, ok := rd.dotRule(); ok {
    88  				//st.rset.add(vd.rFirst.first(r))
    89  				rset.add(vd.rFirst.first(r))
    90  			}
    91  		}
    92  		// compute lookahead rset for parsenextrule
    93  		for rd, las := range v.rdslasC {
    94  			if rd.dotAtEnd() {
    95  				//st.rsetLa.add(las)
    96  				rset.add(las)
    97  			}
    98  		}
    99  
   100  		// remove nil rules from the rset to parse
   101  		rset.unset(nilRule)
   102  
   103  		st.rsetSorted = sortRuleSetForParse(rset)
   104  		st.rsetHasEndRule = rset.has(endRule)
   105  	}
   106  
   107  	return nil
   108  }
   109  
   110  //----------
   111  
   112  func (sd *StatesData) checkActionConflicts(vd *VerticesData) error {
   113  	me := iout.MultiError{}
   114  	for _, st := range sd.states {
   115  		for r, as := range st.action {
   116  			if len(as) <= 1 {
   117  				continue
   118  			}
   119  
   120  			// solve shift/reduct conflicts with shift (ignores reductions in this action)
   121  			if sd.shiftOnSRConflict {
   122  				shifts := []Action{}
   123  				for _, a := range as {
   124  					if u, ok := a.(*ActionShift); ok {
   125  						shifts = append(shifts, u)
   126  					}
   127  				}
   128  				// prefer shift (don't reduce)
   129  				if len(shifts) == 1 {
   130  					st.action[r] = shifts
   131  					continue
   132  				}
   133  			}
   134  
   135  			// have conflict
   136  			w := []string{}
   137  			w = append(w, fmt.Sprintf("conflict: %v, %v:", st.id, r.id()))
   138  			for _, a := range as {
   139  				w = append(w, fmt.Sprintf("%v", a))
   140  			}
   141  			//v := vd.verts[st.id]
   142  			//w = append(w, fmt.Sprintf("%v\n", v))
   143  			//w = append(w, fmt.Sprintf("%v", st))
   144  			err := fmt.Errorf("%v", strings.Join(w, "\n"))
   145  			me.Add(err)
   146  		}
   147  	}
   148  	return me.Result()
   149  }
   150  
   151  //----------
   152  
   153  func (sd *StatesData) checkStringsConflicts(vd *VerticesData) error {
   154  	// TODO: anyrune?
   155  
   156  	for _, st := range sd.states {
   157  		for i, r := range st.rsetSorted {
   158  			sr1, ok := r.(*StringRule)
   159  			if !ok {
   160  				continue
   161  			}
   162  
   163  			for k := i + 1; k < len(st.rsetSorted); k++ {
   164  				r2 := st.rsetSorted[k]
   165  				sr2, ok := r2.(*StringRule)
   166  				if !ok {
   167  					continue
   168  				}
   169  
   170  				if ok, err := sr1.intersect(sr2); err == nil && ok {
   171  					return fmt.Errorf("stringrules %v intersects with %v", sr2, sr1)
   172  				}
   173  			}
   174  		}
   175  	}
   176  	return nil
   177  }
   178  
   179  //func (sd *StatesData) checkStringsConflicts2(sr1, sr2 *StringRule) error {
   180  //	switch sr2.typ {
   181  //	case stringRTOr:
   182  //		for _, ru2 := range sr2.runes {
   183  //			if has, err := sd.srHasRune(sr1, ru2); err != nil {
   184  //				return err
   185  //			} else if has {
   186  //				return fmt.Errorf("rune %q already in %v", ru2, sr1)
   187  //			}
   188  //		}
   189  //		for _, rr2 := range sr2.ranges {
   190  //			for _, ru2 := range []rune{rr2[0], rr2[1]} {
   191  //				if has, err := sd.srHasRune(sr1, ru2); err != nil {
   192  //					return err
   193  //				} else if has {
   194  //					return fmt.Errorf("range %v already in %v", rr2, sr1)
   195  //				}
   196  //			}
   197  //		}
   198  //		//case stringRTOr:
   199  //	}
   200  //	return nil
   201  //}
   202  
   203  //func (sd *StatesData) srHasRune(sr *StringRule, ru rune) (bool, error) {
   204  //	switch sr.typ {
   205  //	case stringRTOr:
   206  //		for _, ru2 := range sr.runes {
   207  //			if ru == ru2 {
   208  //				return true, nil
   209  //			}
   210  //		}
   211  //		for _, rr := range sr.ranges {
   212  //			if rr.HasRune(ru) {
   213  //				return true, nil
   214  //			}
   215  //		}
   216  //		return false, nil
   217  //	case stringRTOrNeg:
   218  //		for _, ru2 := range sr.runes {
   219  //			if ru == ru2 {
   220  //				return false, nil
   221  //			}
   222  //		}
   223  //		for _, rr := range sr.ranges {
   224  //			if !rr.HasRune(ru) {
   225  //				return false, nil
   226  //			}
   227  //		}
   228  //		return true, nil
   229  //	}
   230  //	return false, fmt.Errorf("not orrule")
   231  //}
   232  
   233  //func (sd *StatesData) srHasRune(sr *StringRule, ru rune) (bool, error) {
   234  //	switch sr.typ {
   235  //	case stringRTOr:
   236  //		for _, ru2 := range sr.runes {
   237  //			if ru == ru2 {
   238  //				return true, nil
   239  //			}
   240  //		}
   241  //		for _, rr := range sr.ranges {
   242  //			if rr.HasRune(ru) {
   243  //				return true, nil
   244  //			}
   245  //		}
   246  //		return false, nil
   247  //	case stringRTOrNeg:
   248  //		for _, ru2 := range sr.runes {
   249  //			if ru == ru2 {
   250  //				return false, nil
   251  //			}
   252  //		}
   253  //		for _, rr := range sr.ranges {
   254  //			if !rr.HasRune(ru) {
   255  //				return false, nil
   256  //			}
   257  //		}
   258  //		return true, nil
   259  //	}
   260  //	return false, fmt.Errorf("not orrule")
   261  //}
   262  
   263  //func (sd *StatesData) runeConflict(sr *StringRule, ru rune) error {
   264  //	switch sr.typ {
   265  //	case stringRTOr:
   266  //		for _, ru2 := range sr.runes {
   267  //			if ru2 == ru {
   268  //				return fmt.Errorf("rune %q already defined at %v", ru sr)
   269  //			}
   270  //		}
   271  //		for _, rr := range sr.ranges {
   272  //			if rr.HasRune(ru) {
   273  //				return fmt.Errorf("rune %q already defined at %v", ru sr)
   274  //			}
   275  //		}
   276  //		return false, nil
   277  //	case stringRTOrNeg:
   278  //		for _, ru2 := range sr.runes {
   279  //			if ru == ru2 {
   280  //				return false, nil
   281  //			}
   282  //		}
   283  //		for _, rr := range sr.ranges {
   284  //			if !rr.HasRune(ru) {
   285  //				return false, nil
   286  //			}
   287  //		}
   288  //		return true, nil
   289  //	default:
   290  //		panic(fmt.Sprintf("bad stringrule type: %q", sr.typ))
   291  //	}
   292  //}
   293  
   294  //func (sd *StatesData) solveConflicts(vd *VerticesData) error {
   295  //	// strings conflicts (runes)
   296  //	for _, st := range sd.states {
   297  //		orM := map[rune]Rule{}
   298  //		orNegM := map[rune]Rule{}
   299  //		orRangeM := map[RuneRange]Rule{}
   300  //		orRangeNegM := map[RuneRange]Rule{}
   301  
   302  //		hasAnyrune := false
   303  //		for _, r := range st.rsetSorted {
   304  //			if r == anyruneRule {
   305  //				hasAnyrune = true
   306  //				break
   307  //			}
   308  //		}
   309  
   310  //		// check duplicates in orRules
   311  //		for _, r := range st.rsetSorted {
   312  //			sr, ok := r.(*StringRule)
   313  //			if !ok {
   314  //				continue
   315  //			}
   316  
   317  //			typ := sr.typ
   318  
   319  //			// special case: check andRule as orRule
   320  //			if typ == stringRTAnd && len(sr.runes) == 1 {
   321  //				typ = stringRTOr
   322  //			}
   323  
   324  //			switch typ {
   325  //			//case stringRTAnd: // sequence
   326  //			//case stringRTMid: // sequence
   327  //			case stringRTOr:
   328  //				if err := sd.checkRuneDups(orM, st, sr, sr.runes...); err != nil {
   329  //					return err
   330  //				}
   331  //				if err := sd.checkRangeDups(orRangeM, st, sr, sr.ranges...); err != nil {
   332  //					return err
   333  //				}
   334  //			case stringRTOrNeg:
   335  //				if err := sd.checkRuneDups(orNegM, st, sr, sr.runes...); err != nil {
   336  //					return err
   337  //				}
   338  //				if err := sd.checkRangeDups(orRangeNegM, st, sr, sr.ranges...); err != nil {
   339  //					return err
   340  //				}
   341  //			}
   342  //		}
   343  
   344  //		// check intersections: between individual runes and ranges
   345  //		if err := sd.checkRunesRangesDups(orM, orRangeM, st); err != nil {
   346  //			return err
   347  //		}
   348  //		if err := sd.checkRunesRangesDups(orNegM, orRangeNegM, st); err != nil {
   349  //			return err
   350  //		}
   351  
   352  //		// check intersections: all "or" rules must be in "negation" if it is defined (ex: (a|b|(c|a|b)!)
   353  //		if err := sd.checkRunesNegation(orM, orNegM, orRangeNegM, st); err != nil {
   354  //			return err
   355  //		}
   356  //		//if err := sd.checkRangesNegation(orM, orNegM, st); err != nil {
   357  //		//	return err
   358  //		//}
   359  
   360  //		// check conflicts: all "or" runes must be in "not"
   361  //		if len(orNegM) > 0 {
   362  //			for ru, r := range orM {
   363  //				_, ok := orNegM[ru]
   364  //				if !ok {
   365  //					// show "not" rules
   366  //					rs := &RuleSet{}
   367  //					for _, r2 := range orNegM {
   368  //						rs.set(r2)
   369  //					}
   370  
   371  //					return fmt.Errorf("%v: rune %q in %v is covered in %v", st.id, ru, r, rs)
   372  //				}
   373  //			}
   374  //		}
   375  //		if hasAnyrune {
   376  //			if len(orM) > 0 || len(orNegM) > 0 {
   377  //				return fmt.Errorf("%v: anyrune and stringrule in the same state\n%v", st.id, sd)
   378  //			}
   379  //		}
   380  //	}
   381  
   382  //}
   383  //func (sd *StatesData) checkRuneDups(m map[rune]Rule, st *State, r Rule, rs ...rune) error {
   384  //	for _, ru := range rs {
   385  //		r2, ok := m[ru]
   386  //		if ok {
   387  //			return fmt.Errorf("%v: rune %q in %v is already defined at %v", st.id, ru, r, r2)
   388  //		}
   389  //		m[ru] = r
   390  //	}
   391  //	return nil
   392  //}
   393  //func (sd *StatesData) checkRangeDups(m map[RuneRange]Rule, st *State, r Rule, h ...RuneRange) error {
   394  //	for _, rr := range h {
   395  //		for rr2, r2 := range m {
   396  //			if rr2.IntersectsRange(rr) {
   397  //				return fmt.Errorf("%v: range %q in %v is already defined at %v", st.id, rr, r, r2)
   398  //			}
   399  //		}
   400  //		m[rr] = r
   401  //	}
   402  //	return nil
   403  //}
   404  //func (sd *StatesData) checkRunesRangesDups(m1 map[Rune]Rule, m2 map[RuneRange]Rule, st *State) error {
   405  //	for ru, r1 := range m1 {
   406  //		for rr, r2 := range m2 {
   407  //			if rr.HasRune(ru) {
   408  //				return fmt.Errorf("%v: rune %q in %v is covered by range %v", st.id, ru, r1, rr)
   409  //			}
   410  //		}
   411  //		m[rr] = r
   412  //	}
   413  //	return nil
   414  //}
   415  //func (sd *StatesData) checkRunesNegation(m, neg map[Rune]Rule, negRange map[RuneRange]Rule, st *State) error {
   416  //	// all "or" runes must be in "neg"
   417  //	if len(neg) > 0 {
   418  //		for ru, r := range m {
   419  //			_, ok := neg[ru]
   420  //			if ok {
   421  //				continue
   422  //			}
   423  //			// show "not" rules
   424  //			rs := &RuleSet{}
   425  //			for _, r2 := range neg {
   426  //				rs.set(r2)
   427  //			}
   428  //			return fmt.Errorf("%v: rune %q in %v is covered in %v", st.id, ru, r, rs)
   429  //		}
   430  //	}
   431  //	return nil
   432  //}
   433  //func (sd *StatesData) checkRunesNegation2(m map[Rune]Rule, neg map[RuneRange]Rule, st *State) error {
   434  //	if len(neg) == 0 {
   435  //		return nil
   436  //	}
   437  //	// all "or" runes must be in "neg"
   438  //	for ru, r := range m {
   439  //		for rr,r2:=range neg{
   440  //			if rr.HasRune(ru)[
   441  
   442  //			}
   443  //		}
   444  //		_, ok := neg[ru]
   445  //		if ok {
   446  //			continue
   447  //		}
   448  //		// show "not" rules
   449  //		rs := &RuleSet{}
   450  //		for _, r2 := range neg {
   451  //			rs.set(r2)
   452  //		}
   453  //		return fmt.Errorf("%v: rune %q in %v is covered in %v", st.id, ru, r, rs)
   454  //	}
   455  //	return nil
   456  //}
   457  
   458  //----------
   459  
   460  //godebug:annotateoff
   461  func (sd *StatesData) String() string {
   462  	sb := &strings.Builder{}
   463  	for _, st := range sd.states {
   464  		fmt.Fprintf(sb, "%v\n", st)
   465  	}
   466  	return sb.String()
   467  }
   468  
   469  //----------
   470  //----------
   471  //----------
   472  
   473  type State struct {
   474  	id             stateId
   475  	action         map[Rule][]Action
   476  	gotoSt         map[Rule]*State
   477  	rsetSorted     []Rule // rule set to parse in this state
   478  	rsetHasEndRule bool
   479  }
   480  
   481  func newState(id stateId) *State {
   482  	st := &State{id: id}
   483  	st.action = map[Rule][]Action{}
   484  	st.gotoSt = map[Rule]*State{}
   485  	return st
   486  }
   487  
   488  func (st *State) actionRulesSorted() []Rule {
   489  	w := []Rule{}
   490  	for r := range st.action {
   491  		w = append(w, r)
   492  	}
   493  	sortRules(w)
   494  	return w
   495  }
   496  
   497  //godebug:annotateoff
   498  func (st *State) String() string {
   499  	s := fmt.Sprintf("%v:\n", st.id)
   500  
   501  	s += "\tactions:\n"
   502  	for _, r := range st.actionRulesSorted() {
   503  		a := st.action[r]
   504  		//u := fmt.Sprintf("%v(%p,%T)-> %v\n", r.id(), r, r, a)
   505  		u := fmt.Sprintf("%v -> %v\n", r.id(), a)
   506  		s += indentStr("\t\t", u)
   507  	}
   508  
   509  	s += "\tgotos:\n"
   510  	for r, st2 := range st.gotoSt {
   511  		u := fmt.Sprintf("%v -> %v\n", r.id(), st2.id)
   512  		s += indentStr("\t\t", u)
   513  	}
   514  
   515  	//s += indentStr("\t", "rset: "+st.rset.String())
   516  	//s += indentStr("\t", "la rset: "+st.rsetLa.String())
   517  	s += indentStr("\t", fmt.Sprintf("rset: %v", st.rsetSorted))
   518  	s = strings.TrimSpace(s)
   519  	return s
   520  }
   521  
   522  //----------
   523  //----------
   524  //----------
   525  
   526  type stateId int
   527  
   528  func (sid stateId) String() string {
   529  	return fmt.Sprintf("state%d", int(sid))
   530  }
   531  
   532  //----------
   533  //----------
   534  //----------
   535  
   536  type Action interface{}
   537  
   538  type ActionShift struct {
   539  	st *State
   540  }
   541  
   542  func (a *ActionShift) String() string {
   543  	return fmt.Sprintf("{shift:%v}", a.st.id)
   544  }
   545  
   546  type ActionReduce struct {
   547  	prod Rule // reduce to rule
   548  	popN int  // pop n
   549  }
   550  
   551  func (a *ActionReduce) String() string {
   552  	return fmt.Sprintf("{reduce:%v,pop=%v}", a.prod.id(), a.popN)
   553  }
   554  
   555  type ActionAccept struct {
   556  }
   557  
   558  func (a *ActionAccept) String() string {
   559  	return fmt.Sprintf("{accept}")
   560  }