github.com/jmigpin/editor@v1.6.0/util/parseutil/lrparser/rulederef.go (about)

     1  package lrparser
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  
     7  	"github.com/jmigpin/editor/util/goutil"
     8  )
     9  
    10  func dereferenceRules(ri *RuleIndex) error {
    11  	// replace refrules to avoid rule ids with "refs", and catch first errors in case a refrule does not exist
    12  	if err := replaceRefRules(ri); err != nil {
    13  		return err
    14  	}
    15  	// checks boolrule value (now), can run only after replaceRefRules
    16  	if err := replaceIfRules(ri); err != nil {
    17  		return err
    18  	}
    19  
    20  	if err := replaceRulesLevel2(ri); err != nil { // parenrules(strings), procrules
    21  		return err
    22  	}
    23  	if err := replaceParenthesisRules(ri); err != nil {
    24  		return err
    25  	}
    26  	if err := replaceDuplicateRules(ri); err != nil {
    27  		return err
    28  	}
    29  
    30  	// sanity check: rules not allowed after deref phase
    31  	return visitRuleIndexRulesAndChilds(ri, func(rref *Rule) error {
    32  		switch t := (*rref).(type) {
    33  		case *RefRule,
    34  			*ParenRule,
    35  			*IfRule,
    36  			//*BoolRule, // commented: some residual rule not used in an "if" will still be present // TODO: make a clear step of boolrules?
    37  			*ProcRule:
    38  			err := fmt.Errorf("rule type present after deref phase: %T, %v", t, t)
    39  			//return err
    40  			panic(err)
    41  		}
    42  		return nil
    43  	})
    44  
    45  	return nil
    46  }
    47  
    48  //----------
    49  
    50  func replaceRefRules(ri *RuleIndex) error {
    51  	visit := (visitRuleRefFn)(nil)
    52  	visit = wrapVisitSeen(func(rref *Rule) error {
    53  		switch t := (*rref).(type) {
    54  		case *RefRule:
    55  			// replace with rule in ruleindex
    56  			if !replaceFromMap(ri.m, t.name, rref) {
    57  				err := fmt.Errorf("rule not found: %v", t.name)
    58  				return &PosError{Err: err, Pos: t.Pos()}
    59  			}
    60  		case *ProcRule:
    61  			for k, arg := range t.args {
    62  				if r, ok := arg.(Rule); ok {
    63  					if err := visit(&r); err != nil {
    64  						return err
    65  					}
    66  					t.args[k] = r
    67  				}
    68  			}
    69  		}
    70  		return walkRuleChilds(*rref, visit)
    71  	})
    72  	return visitRuleIndexRules(ri, visit)
    73  }
    74  func replaceIfRules(ri *RuleIndex) error {
    75  	return visitRuleIndexRulesAndChilds(ri, func(rref *Rule) error {
    76  		switch t := (*rref).(type) {
    77  		case *IfRule:
    78  			c0 := t.childs_[0] // conditional rule
    79  			c1 := t.childs_[1] // rule if condition is true
    80  			c2 := t.childs_[2] // rule if condition is false
    81  			c0br, ok := c0.(*BoolRule)
    82  			if !ok {
    83  				return fmt.Errorf("ifrule condition is not a boolrule: %v (%T)", c0, c0)
    84  			}
    85  			// observe the value now
    86  			if c0br.value {
    87  				*rref = c1
    88  			} else {
    89  				*rref = c2
    90  			}
    91  		}
    92  		return nil
    93  	})
    94  }
    95  
    96  //----------
    97  
    98  func replaceRulesLevel2(ri *RuleIndex) error {
    99  	visit := (visitRuleRefFn)(nil)
   100  	visit = wrapVisitSeen(func(rref *Rule) error {
   101  		switch t := (*rref).(type) {
   102  		case *ParenRule: // only string rules
   103  			switch t.typ {
   104  			case parenRTStrOr,
   105  				parenRTStrOrNeg,
   106  				parenRTStrOrRange,
   107  				parenRTStrMid:
   108  				// visit childs before applying replacement
   109  				if err := walkRuleChilds(t, visit); err != nil {
   110  					return err
   111  				}
   112  
   113  				if sr, err := parenToStringRule(t.onlyChild(), t.typ); err != nil {
   114  					return err
   115  				} else {
   116  					*rref = sr
   117  				}
   118  			}
   119  		case *ProcRule:
   120  			fn, ok := ri.pm[t.name]
   121  			if !ok {
   122  				return nodePosErrorf(t, "call rule not found: %v", t.name)
   123  			}
   124  
   125  			// visit args (if rules) before applying replacement
   126  			for k, arg := range t.args {
   127  				if r2, ok := arg.(Rule); ok {
   128  					if err := visit(&r2); err != nil {
   129  						return err
   130  					}
   131  					//_ = k
   132  					t.args[k] = r2
   133  
   134  					//if err := walkRuleChilds(r2, visit); err != nil {
   135  					//	return err
   136  					//}
   137  				}
   138  			}
   139  
   140  			if u, err := fn(t.args); err != nil {
   141  				return nodePosErrorf(t, "%v: %w", t.name, err)
   142  			} else {
   143  				*rref = u
   144  			}
   145  		}
   146  
   147  		return walkRuleChilds(*rref, visit)
   148  	})
   149  	return visitRuleIndexRules(ri, visit)
   150  }
   151  func parenToStringRule(r Rule, ptyp parenRType) (*StringRule, error) {
   152  	sr, err := mergeStringRules(r)
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  	// accept
   157  	switch sr.typ {
   158  	case stringRTAnd:
   159  		switch ptyp {
   160  		case parenRTStrOr:
   161  			sr2 := *sr
   162  			sr2.typ = stringRTOr
   163  			return &sr2, nil
   164  		case parenRTStrOrNeg:
   165  			sr2 := *sr
   166  			sr2.typ = stringRTOrNeg
   167  			return &sr2, nil
   168  		case parenRTStrMid:
   169  			sr2 := *sr
   170  			sr2.typ = stringRTMid
   171  			return &sr2, nil
   172  		case parenRTStrOrRange:
   173  			if len(sr.runes) != 2 {
   174  				return nil, fmt.Errorf("expecting only 2 runes for range: %v", sr)
   175  			}
   176  			sr2 := &StringRule{typ: stringRTOr}
   177  			sr2.rranges = append(sr2.rranges, RuneRange{sr.runes[0], sr.runes[1]})
   178  			return sr2, nil
   179  		}
   180  	case stringRTOr:
   181  		switch ptyp {
   182  		case parenRTStrOr:
   183  			sr2 := *sr
   184  			sr2.typ = stringRTOr
   185  			return &sr2, nil
   186  		case parenRTStrOrNeg:
   187  			sr2 := *sr
   188  			sr2.typ = stringRTOrNeg
   189  			return &sr2, nil
   190  		}
   191  	}
   192  	return nil, fmt.Errorf("parenthesis to stringrule: unable to accept stringrule %v to type %q", sr, ptyp)
   193  }
   194  
   195  //----------
   196  
   197  // the rule index will not have parenthesis rules after this step, as they will be transformed into defrule with the equivalent id, using and/or rules
   198  func replaceParenthesisRules(ri *RuleIndex) error {
   199  	//// parenthesis defrule name
   200  	//lzc := 0  // loop zero counter
   201  	//loc := 0  // loop one counter
   202  	//optc := 0 // optional counter
   203  	//pname := func(t parenrType) string {
   204  	//	ts := ""
   205  	//	switch t {
   206  	//	case parenrOptional:
   207  	//		ts = fmt.Sprintf("opt%d", optc)
   208  	//		optc++
   209  	//	case parenrZeroOrMore:
   210  	//		ts = fmt.Sprintf("lz%d", lzc)
   211  	//		lzc++
   212  	//	case parenrOneOrMore:
   213  	//		ts = fmt.Sprintf("lo%d", loc)
   214  	//		loc++
   215  	//	default:
   216  	//		panic("!")
   217  	//	}
   218  	//	return fmt.Sprintf("%s", ts)
   219  	//}
   220  	//_ = pname
   221  
   222  	unique := map[string]*DefRule{}
   223  	newDefRule := func(pr *ParenRule) *DefRule {
   224  		id := pr.id()
   225  		dr, ok := unique[id]
   226  		if ok {
   227  			return dr
   228  		}
   229  		//dr = &DefRule{name: pname(pr.typ)}
   230  		dr = &DefRule{name: id}
   231  		unique[id] = dr
   232  		if err := ri.set(dr.name, dr); err != nil {
   233  			panic(err)
   234  		}
   235  		return dr
   236  	}
   237  
   238  	visit := (visitRuleRefFn)(nil)
   239  	visit = wrapVisitChilds(func(rref *Rule) error {
   240  		switch t := (*rref).(type) {
   241  		case *ParenRule:
   242  			// replace with defrule with special name
   243  			switch t.typ {
   244  			case parenRTNone:
   245  				*rref = t.onlyChild()
   246  				//return visit(rref) // visit the new rref itself
   247  			case parenRTOptional:
   248  				dr := newDefRule(t)
   249  				r2 := t.onlyChild()
   250  				r4 := &OrRule{}
   251  				r4.childs_ = []Rule{r2, nilRule}
   252  				dr.setOnlyChild(r4)
   253  				dr.isPOptional = true
   254  				*rref = dr
   255  			case parenRTZeroOrMore:
   256  				dr := newDefRule(t)
   257  				r2 := t.onlyChild()
   258  				r3 := &AndRule{}
   259  				r3.childs_ = []Rule{dr, r2} // loop before (smaller run stack // also allows less conflicts due to left-to-right?) // order also used in node.go childloop func
   260  				//r3.childs_ = []Rule{r2, dr} // loop after
   261  				r4 := &OrRule{}
   262  				r4.childs_ = []Rule{r3, nilRule}
   263  				dr.setOnlyChild(r4)
   264  				dr.isNoReverse = true
   265  				dr.isPZeroOrMore = true
   266  				*rref = dr
   267  
   268  				//// with oneormore
   269  				//dr := newDefRule(t)
   270  				//r2 := t.onlyChild()
   271  				//r3 := &ParenRule{}
   272  				//r3.typ = parenRTOneOrMore
   273  				//r3.setOnlyChild(r2)
   274  				//r4 := &OrRule{}
   275  				//r4.childs_ = []Rule{r3, nilRule} // place loop before // order also used in node.go childloop func
   276  				////r4.childs_ = []Rule{r2, r3} // place loop after // TODO: fails testlrparser21
   277  				//dr.setOnlyChild(r4)
   278  				//dr.isNoReverse = true
   279  				//dr.isPOneOrMore = true
   280  				//*rref = dr
   281  
   282  			case parenRTOneOrMore:
   283  				//// own loop
   284  				//// - has issues with early stop because there is no nil rule to recover with
   285  				//dr := newDefRule(t)
   286  				//r2 := t.onlyChild()
   287  				//r3 := &AndRule{}
   288  				//r3.childs_ = []Rule{dr, r2} // loop before (smaller run stack)
   289  				////r3.childs_ = []Rule{r2, dr} // loop after
   290  				//r4 := &OrRule{}
   291  				//r4.childs_ = []Rule{r3, r2}
   292  				//dr.setOnlyChild(r4)
   293  				//dr.isNoReverse = true
   294  				//dr.isPOneOrMore = true
   295  				//*rref = dr
   296  
   297  				// with zeroormore
   298  				dr := newDefRule(t)
   299  				r2 := t.onlyChild()
   300  				r3 := &ParenRule{}
   301  				r3.typ = parenRTZeroOrMore
   302  				r3.setOnlyChild(r2)
   303  				r4 := &AndRule{}
   304  				r4.childs_ = []Rule{r3, r2} // place loop before // order also used in node.go childloop func
   305  				//r4.childs_ = []Rule{r2, r3} // place loop after // TODO: fails testlrparser21
   306  				dr.setOnlyChild(r4)
   307  				dr.isNoReverse = true
   308  				dr.isPOneOrMore = true
   309  				*rref = dr
   310  
   311  				//// with optional
   312  				//dr := newDefRule(t)
   313  				//r2 := t.onlyChild()
   314  				//r3 := &ParenRule{}
   315  				//r3.typ = parenRTOptional
   316  				//r3.setOnlyChild(dr)
   317  				//r4 := &AndRule{}
   318  				////r4.childs_ = []Rule{r3, r2} // place loop before // order also used in node.go childloop func
   319  				//r4.childs_ = []Rule{r2, r3} // place loop after // TODO: fails testlrparser21
   320  				//dr.setOnlyChild(r4)
   321  				//dr.isNoReverse = true
   322  				//dr.isPOneOrMore = true
   323  				//*rref = dr
   324  			default:
   325  				return goutil.TodoErrorStr(fmt.Sprintf("%q", t.typ))
   326  			}
   327  
   328  			// visit the new rref itself
   329  			return visit(rref)
   330  		}
   331  		return nil
   332  	})
   333  	return visitRuleIndexRules(ri, visit)
   334  }
   335  
   336  // make rules unique
   337  // - the pos is lost since the repeated rules are replaced with the first definition
   338  // - the rule src position must not be used after this function
   339  func replaceDuplicateRules(ri *RuleIndex) error {
   340  	unique := map[string]*Rule{}
   341  	return visitRuleIndexRulesAndChilds(ri, func(rref *Rule) error {
   342  		_ = replaceFromMap(unique, (*rref).id(), rref)
   343  		return nil
   344  	})
   345  }
   346  
   347  //----------
   348  //----------
   349  //----------
   350  
   351  func visitRuleIndexRules(ri *RuleIndex, fn visitRuleRefFn) error {
   352  	// stable iteration to avoid (if used) unstable parenthesis loop names
   353  	ks := []string{}
   354  	for k := range ri.m {
   355  		ks = append(ks, k)
   356  	}
   357  	sort.Strings(ks)
   358  	for _, k := range ks {
   359  		r := ri.m[k]
   360  		if err := fn(r); err != nil {
   361  			return err
   362  		}
   363  	}
   364  	return nil
   365  }
   366  func visitRuleIndexRulesAndChilds(ri *RuleIndex, fn visitRuleRefFn) error {
   367  	visit := (visitRuleRefFn)(nil) // example on how fn could refer to visit inside
   368  	visit = wrapVisitChilds(fn)
   369  	return visitRuleIndexRules(ri, visit)
   370  }
   371  func wrapVisitChilds(fn visitRuleRefFn) visitRuleRefFn {
   372  	seen := map[Rule]bool{} // avoid loops
   373  	fn2 := (func(rref *Rule) error)(nil)
   374  	fn2 = func(rref *Rule) error {
   375  		if seen[*rref] {
   376  			return nil
   377  		}
   378  		k := *rref // keep in case it was changed inside fn()
   379  		seen[k] = true
   380  		defer func() { seen[k] = false }() // allow revisit in other branches
   381  
   382  		if err := fn(rref); err != nil {
   383  			return err
   384  		}
   385  		return walkRuleChilds(*rref, fn2)
   386  	}
   387  	return fn2
   388  }
   389  func wrapVisitSeen(fn visitRuleRefFn) visitRuleRefFn {
   390  	seen := map[Rule]bool{}
   391  	fn2 := (func(rref *Rule) error)(nil)
   392  	fn2 = func(rref *Rule) error {
   393  		if seen[*rref] {
   394  			return nil
   395  		}
   396  		k := *rref
   397  		seen[k] = true
   398  		defer func() { seen[k] = false }()
   399  		return fn(rref)
   400  	}
   401  	return fn2
   402  }
   403  
   404  //----------
   405  
   406  type visitRuleRefFn func(*Rule) error
   407  
   408  //----------
   409  //----------
   410  //----------
   411  
   412  func replaceFromMap(m map[string]*Rule, id string, rref *Rule) bool {
   413  	r2, ok := m[id]
   414  	if ok {
   415  		// replace reference with the one already existent
   416  		*rref = *r2
   417  		return true
   418  	}
   419  	m[id] = rref // keep
   420  	return false // not replaced
   421  }
   422  
   423  //----------
   424  
   425  func nodePosErrorf(n PNode, f string, args ...interface{}) error {
   426  	err := fmt.Errorf(f, args...)
   427  	return &PosError{Err: err, Pos: n.Pos()}
   428  }