github.com/jmigpin/editor@v1.6.0/util/parseutil/lrparser/rule.go (about) 1 package lrparser 2 3 import ( 4 "fmt" 5 "sort" 6 "strings" 7 "unicode" 8 9 "github.com/jmigpin/editor/util/goutil" 10 ) 11 12 type Rule interface { 13 id() string 14 isTerminal() bool 15 childs() []Rule 16 iterChildRefs(fn func(index int, ref *Rule) error) error 17 String() string 18 } 19 20 //---------- 21 //---------- 22 //---------- 23 24 // common rule 25 type CmnRule struct { 26 childs_ []Rule 27 } 28 29 //---------- 30 31 func (r *CmnRule) addChilds(r2 ...Rule) { 32 r.childs_ = append(r.childs_, r2...) 33 } 34 func (r *CmnRule) onlyChild() Rule { 35 return r.childs_[0] 36 } 37 func (r *CmnRule) setOnlyChild(r2 Rule) { 38 r.childs_ = r.childs_[:0] 39 r.addChilds(r2) 40 } 41 42 //---------- 43 44 //godebug:annotateoff 45 func (r *CmnRule) iterChildRefs(fn func(index int, ref *Rule) error) error { 46 for i := 0; i < len(r.childs_); i++ { 47 if err := fn(i, &r.childs_[i]); err != nil { 48 return err 49 } 50 } 51 return nil 52 } 53 func (r *CmnRule) childs() []Rule { 54 return r.childs_ 55 } 56 57 //---------- 58 //---------- 59 //---------- 60 61 // definition rule 62 // (1 child) 63 type DefRule struct { 64 BasicPNode 65 CmnRule 66 name string 67 isStart bool // has "start" symbol in the grammar 68 isNoPrint bool // don't print in rule index (useful for tests) 69 70 // specially handled cases 71 isNoReverse bool // don't reverse child sequence in reverse mode 72 isPOptional bool // parenthesis: optional 73 isPZeroOrMore bool // parenthesis: zeroormore 74 isPOneOrMore bool // parenthesis: oneormore 75 } 76 77 func (r *DefRule) isTerminal() bool { 78 return false 79 } 80 func (r *DefRule) id() string { 81 s := "" 82 if r.isStart { 83 s += defRuleStartSym 84 } 85 return fmt.Sprintf("%v%v", s, r.name) 86 } 87 func (r *DefRule) String() string { 88 return fmt.Sprintf("%v = %v", r.id(), r.onlyChild().id()) 89 } 90 91 var defRuleStartSym = "^" // used in grammar 92 var defRuleNoPrintSym = "ยง" // used in grammar 93 94 //---------- 95 96 // reference to a rule // replaced in dereference phase 97 // (0 childs) 98 type RefRule struct { 99 BasicPNode 100 CmnRule 101 name string 102 } 103 104 func (r *RefRule) isTerminal() bool { 105 return false 106 } 107 func (r *RefRule) id() string { 108 return fmt.Sprintf("{r:%v}", r.name) 109 } 110 func (r *RefRule) String() string { 111 return r.id() 112 } 113 114 //---------- 115 116 // (n childs as a sequence, not productions) 117 type AndRule struct { 118 BasicPNode 119 CmnRule 120 } 121 122 func (r *AndRule) isTerminal() bool { 123 return false 124 } 125 func (r *AndRule) id() string { 126 w := []string{} 127 for _, r := range r.childs_ { 128 w = append(w, r.id()) 129 } 130 u := strings.Join(w, " ") 131 return fmt.Sprintf("[%v]", u) 132 } 133 func (r *AndRule) String() string { 134 return r.id() 135 } 136 137 //---------- 138 139 // (n childs) 140 type OrRule struct { 141 BasicPNode 142 CmnRule 143 } 144 145 func (r *OrRule) isTerminal() bool { 146 return false 147 } 148 func (r *OrRule) id() string { 149 w := []string{} 150 for _, r := range r.childs_ { 151 w = append(w, r.id()) 152 } 153 u := strings.Join(w, "|") 154 return fmt.Sprintf("[%v]", u) 155 } 156 func (r *OrRule) String() string { 157 return r.id() 158 } 159 160 //---------- 161 162 // replaced in dereference phase 163 // (3 childs: [conditional,then,else]) 164 type IfRule struct { 165 BasicPNode 166 CmnRule 167 } 168 169 func (r *IfRule) selfSequence() []Rule { return []Rule{r} } 170 func (r *IfRule) isTerminal() bool { return false } 171 func (r *IfRule) id() string { 172 return fmt.Sprintf("{if %v ? %v : %v}", r.childs_[0], r.childs_[1], r.childs_[2]) 173 } 174 func (r *IfRule) String() string { 175 return r.id() 176 } 177 178 //---------- 179 180 // To be used in src code and then found by IfRule; the value is observed when building the contentparser, not at parse time 181 // (0 childs) 182 type BoolRule struct { 183 CmnRule 184 name string 185 value bool 186 } 187 188 func (r *BoolRule) isTerminal() bool { 189 return true 190 } 191 func (r *BoolRule) id() string { 192 return fmt.Sprintf("{b:%v:%v}", r.name, r.value) 193 } 194 func (r *BoolRule) String() string { 195 return r.id() 196 } 197 198 //---------- 199 200 // parenthesis, ex: (aaa (bbb|ccc)) 201 // replaced by defrules at ruleindex 202 // (1 childs) 203 type ParenRule struct { 204 BasicPNode 205 CmnRule 206 typ parenRType 207 } 208 209 func (r *ParenRule) isTerminal() bool { 210 return false 211 } 212 213 func (r *ParenRule) id() string { 214 s := "" 215 if r.typ != parenRTNone { 216 s = string(r.typ) 217 } 218 return fmt.Sprintf("(%v)%v", r.onlyChild().id(), s) 219 } 220 func (r *ParenRule) String() string { 221 return r.id() 222 } 223 224 //---------- 225 226 // (0 childs) 227 type StringRule struct { 228 BasicPNode 229 CmnRule 230 runes []rune 231 rranges RuneRanges 232 typ stringRType 233 } 234 235 func (r *StringRule) isTerminal() bool { 236 return true 237 } 238 func (r *StringRule) id() string { 239 s := "" 240 if len(r.runes) > 0 { 241 s += fmt.Sprintf("%q", string(r.runes)) 242 } 243 if len(r.rranges) > 0 { 244 u := []string{} 245 if len(s) > 0 { 246 u = append(u, s) 247 } 248 for _, rr := range r.rranges { 249 u = append(u, fmt.Sprintf("%v", rr)) 250 } 251 s = strings.Join(u, ",") 252 return fmt.Sprintf("{%v,%v}", s, r.typ) 253 } 254 return fmt.Sprintf("%v%v", s, r.typ) 255 } 256 func (r *StringRule) String() string { 257 return r.id() 258 } 259 260 //---------- 261 262 func (sr1 *StringRule) intersect(sr2 *StringRule) (bool, error) { 263 switch sr1.typ { 264 case stringRTOr, stringRTOrNeg: 265 default: 266 return false, fmt.Errorf("expecting or/orneg: %T", sr1.typ) 267 } 268 switch sr2.typ { 269 case stringRTOr, stringRTOrNeg: 270 default: 271 return false, fmt.Errorf("expecting or/orneg: %T", sr2.typ) 272 } 273 if sr1.typ == sr2.typ { // same polarity 274 if sr1.intersectRunes(sr2.runes) { 275 return true, nil 276 } 277 if sr1.intersectRanges(sr2.rranges) { 278 return true, nil 279 } 280 } else { 281 if !sr1.intersectRunes(sr2.runes) && !sr1.intersectRanges(sr2.rranges) { 282 return true, nil 283 } 284 } 285 return false, nil 286 } 287 func (r *StringRule) intersectRunes(rus []rune) bool { 288 for _, ru2 := range rus { 289 for _, ru := range r.runes { 290 if ru == ru2 { 291 return true 292 } 293 } 294 for _, rr := range r.rranges { 295 if rr.HasRune(ru2) { 296 return true 297 } 298 } 299 } 300 return false 301 } 302 func (r *StringRule) intersectRanges(rrs []RuneRange) bool { 303 for _, rr2 := range rrs { 304 for _, ru := range r.runes { 305 if rr2.HasRune(ru) { 306 return true 307 } 308 } 309 for _, rr := range r.rranges { 310 if rr.IntersectsRange(rr2) { 311 return true 312 } 313 } 314 } 315 return false 316 } 317 318 //---------- 319 320 func (r *StringRule) parse(ps *PState) error { 321 return r.parse2(ps, r.typ) 322 } 323 func (r *StringRule) parse2(ps *PState, typ stringRType) error { 324 switch typ { 325 case stringRTAnd: // sequence, ex: keyword 326 return ps.M.RuneSequence(r.runes) 327 case stringRTMid: // sequence, ex: keyword 328 return ps.M.RuneSequenceMid(r.runes) 329 case stringRTOr: 330 return ps.M.RunesAndRuneRanges(r.runes, r.rranges) 331 case stringRTOrNeg: 332 return ps.M.RunesAndRuneRangesNot(r.runes, r.rranges) 333 default: 334 panic(goutil.TodoErrorStr(string(r.typ))) 335 } 336 } 337 338 //---------- 339 340 // processor function call rule: allows processing rules at compile time. Ex: string operations. 341 // (0 childs) 342 type ProcRule struct { 343 BasicPNode 344 CmnRule 345 name string 346 args []ProcRuleArg // allows more then just rules (ex: ints) 347 } 348 349 func (r *ProcRule) isTerminal() bool { 350 return true 351 } 352 func (r *ProcRule) id() string { 353 return fmt.Sprintf("%v(%v)", r.name, r.childs()) 354 } 355 func (r *ProcRule) String() string { 356 return r.id() 357 } 358 359 //---------- 360 361 // (0 childs) 362 type FuncRule struct { 363 CmnRule 364 name string 365 parseOrder int // value for sorting parse order, zero for func default, check 366 fn PStateParseFn 367 } 368 369 func (r *FuncRule) isTerminal() bool { 370 return true 371 } 372 func (r *FuncRule) id() string { 373 sv := "" 374 if r.parseOrder != 0 { 375 sv = fmt.Sprintf("<%v>", r.parseOrder) 376 } 377 return fmt.Sprintf("%v%v", r.name, sv) 378 } 379 func (r *FuncRule) String() string { 380 return r.id() 381 } 382 383 //---------- 384 385 // (0 childs) 386 type SingletonRule struct { 387 BasicPNode 388 CmnRule 389 name string 390 isTerm bool 391 } 392 393 func newSingletonRule(name string, isTerm bool) *SingletonRule { 394 return &SingletonRule{name: name, isTerm: isTerm} 395 } 396 func (r *SingletonRule) isTerminal() bool { 397 return r.isTerm 398 } 399 func (r *SingletonRule) id() string { return r.name } 400 func (r *SingletonRule) String() string { return r.id() } 401 402 // setup to be available in the grammars at ruleindex.go 403 var endRule = newSingletonRule("$", true) 404 var nilRule = newSingletonRule("nil", true) 405 406 // special start rule to know start/end (not a terminal) 407 var startRule = newSingletonRule("^^^", false) 408 409 //---------- 410 //---------- 411 //---------- 412 413 // parenthesis rule type 414 type parenRType rune 415 416 const ( 417 parenRTNone parenRType = 0 418 parenRTOptional parenRType = '?' 419 parenRTZeroOrMore parenRType = '*' 420 parenRTOneOrMore parenRType = '+' 421 422 // strings related 423 parenRTStrOr parenRType = '%' // individual runes 424 parenRTStrOrNeg parenRType = '!' // individual runes: not 425 parenRTStrOrRange parenRType = '-' // individual runes: range 426 parenRTStrMid parenRType = '~' // sequence: middle match 427 ) 428 429 //---------- 430 431 // string rule type 432 type stringRType byte 433 434 const ( 435 stringRTAnd stringRType = iota 436 stringRTOr 437 stringRTOrNeg 438 stringRTMid 439 ) 440 441 func (srt stringRType) String() string { 442 switch srt { 443 case stringRTAnd: 444 return "" // empty 445 case stringRTOr: 446 return string(parenRTStrOr) 447 case stringRTOrNeg: 448 return string(parenRTStrOrNeg) 449 case stringRTMid: 450 return string(parenRTStrMid) 451 default: 452 panic(srt) 453 } 454 } 455 456 // ---------- 457 458 type ProcRuleFn func(args ProcRuleArgs) (Rule, error) 459 type ProcRuleArg any 460 type ProcRuleArgs []ProcRuleArg 461 462 func (args ProcRuleArgs) Int(i int) (int, error) { 463 if i >= len(args) { 464 return 0, fmt.Errorf("missing arg %v", i) 465 } 466 arg := args[i] 467 u, ok := arg.(int) 468 if !ok { 469 return 0, fmt.Errorf("arg %v is not an int (%T)", i, arg) 470 } 471 return u, nil 472 } 473 func (args ProcRuleArgs) Rule(i int) (Rule, error) { 474 if i >= len(args) { 475 return nil, fmt.Errorf("missing arg %v", i) 476 } 477 arg := args[i] 478 u, ok := arg.(Rule) 479 if !ok { 480 return nil, fmt.Errorf("arg %v is not a rule (%T)", i, arg) 481 } 482 return u, nil 483 } 484 func (args ProcRuleArgs) MergedStringRule(i int) (*StringRule, error) { 485 r, err := args.Rule(i) 486 if err != nil { 487 return nil, err 488 } 489 sr, err := mergeStringRules(r) 490 if err != nil { 491 return nil, fmt.Errorf("arg %v: %w", i, err) 492 } 493 return sr, nil 494 } 495 496 //---------- 497 //---------- 498 //---------- 499 500 type RuleSet map[Rule]struct{} 501 502 func (rs RuleSet) set(r Rule) { 503 rs[r] = struct{}{} 504 } 505 func (rs RuleSet) unset(r Rule) { 506 delete(rs, r) 507 } 508 func (rs RuleSet) has(r Rule) bool { 509 _, ok := rs[r] 510 return ok 511 } 512 func (rs RuleSet) add(rs2 RuleSet) { 513 for r := range rs2 { 514 rs.set(r) 515 } 516 } 517 func (rs RuleSet) remove(rs2 RuleSet) { 518 for r := range rs2 { 519 rs.unset(r) 520 } 521 } 522 func (rs RuleSet) toSlice() []Rule { 523 w := []Rule{} 524 for r := range rs { 525 w = append(w, r) 526 } 527 return w 528 } 529 func (rs RuleSet) sorted() []Rule { 530 w := rs.toSlice() 531 sortRules(w) 532 return w 533 } 534 func (rs RuleSet) String() string { 535 u := []string{} 536 w := rs.sorted() 537 for _, r := range w { 538 u = append(u, fmt.Sprintf("%v", r)) 539 } 540 return fmt.Sprintf("[%v]", strings.Join(u, ",")) 541 } 542 543 //---------- 544 545 func sortRuleSetForParse(rset RuleSet) []Rule { 546 // integer/string for sorting 547 svalues := func(r Rule) (int, string) { 548 switch t := r.(type) { 549 case *FuncRule: 550 sv := 100 // allows grammars to use (1,2,...) value without thinking about the funcs default value 551 if t.parseOrder != 0 { 552 sv = t.parseOrder 553 } 554 return sv, t.name 555 case *StringRule: 556 switch t.typ { 557 case stringRTAnd: // ex: keywords 558 return 201, string(t.runes) 559 case stringRTMid: // ex: keywords 560 return 202, string(t.runes) 561 case stringRTOr: // individual runes 562 return 203, string(t.runes) 563 case stringRTOrNeg: // individual runes 564 return 204, string(t.runes) 565 default: 566 panic(goutil.TodoErrorStr(string(t.typ))) 567 } 568 case *SingletonRule: 569 switch t { 570 case endRule: 571 return 301, "" 572 } 573 panic(goutil.TodoErrorStr(t.name)) 574 } 575 panic(goutil.TodoErrorType(r)) 576 } 577 578 x := rset.toSlice() 579 sort.Slice(x, func(a, b int) bool { 580 ra, rb := x[a], x[b] 581 ta, sa := svalues(ra) 582 tb, sb := svalues(rb) 583 if ta == tb { 584 return sa < sb 585 } 586 return ta < tb 587 }) 588 return x 589 } 590 591 //---------- 592 //---------- 593 //---------- 594 595 func sortRules(w []Rule) { 596 sort.Slice(w, func(a, b int) bool { 597 ra, rb := w[a], w[b] 598 ta, sa := sortRulesValue(ra) 599 tb, sb := sortRulesValue(rb) 600 if ta == tb { 601 return sa < sb 602 } 603 return ta < tb 604 }) 605 } 606 func sortRulesValue(r Rule) (int, string) { 607 id := r.id() 608 // terminals (last) 609 if r.isTerminal() { 610 return 5, id 611 } 612 // productions: start rule (special) 613 if r == startRule { 614 return 1, id 615 } 616 // productions: starting rule (grammar) 617 if dr, ok := r.(*DefRule); ok && dr.isStart { 618 return 2, id 619 } 620 // productions: name starts with a letter (as opposed to ex: "(") 621 u := []rune(id) 622 if unicode.IsLetter(u[0]) { 623 return 3, id 624 } 625 // productions 626 return 4, id 627 } 628 629 //---------- 630 //---------- 631 //---------- 632 633 //godebug:annotateoff 634 func ruleProductions(r Rule) []Rule { 635 switch t := r.(type) { 636 case *AndRule: // andrule childs are not productions 637 return []Rule{t} 638 case *DefRule: 639 switch t2 := t.onlyChild().(type) { 640 case *OrRule: 641 return t2.childs() 642 } 643 } 644 return r.childs() 645 } 646 647 //godebug:annotateoff 648 func ruleSequence(r Rule, reverse bool) []Rule { 649 switch t := r.(type) { 650 case *AndRule: // andrule is the only rule whose childs provide a sequence 651 if reverse { 652 // use a copy to avoid changing the original rule that could be used for other grammars that are non-reverse 653 return reverseRulesCopy(t.childs()) 654 } 655 return t.childs() 656 default: 657 return []Rule{t} 658 } 659 } 660 func ruleProdCanReverse(r Rule) bool { 661 if dr, ok := r.(*DefRule); ok { 662 return !dr.isNoReverse 663 } 664 return true 665 } 666 667 //func ruleIsLoop(r Rule) bool { 668 // dr, ok := r.(*DefRule) 669 // return ok && dr.isLoop 670 //} 671 //func ruleCanBeNil(r0 Rule) bool { 672 // seen := map[Rule]bool{} 673 // vis := (func(r Rule) bool)(nil) 674 // vis = func(r Rule) bool { 675 // if seen[r] { 676 // return false 677 // } 678 // seen[r] = true 679 // defer func() { seen[r] = false }() 680 681 // if r == nilRule { 682 // return true 683 // } 684 // switch t := r.(type) { 685 // case *DefRule: 686 // return vis(t.onlyChild()) 687 // case *OrRule: 688 // for _, r2 := range t.childs2 { 689 // if vis(r2) { 690 // return true 691 // } 692 // } 693 // case *AndRule: 694 // for _, r2 := range t.childs2 { 695 // if !seen[r2] && !vis(r2) { 696 // return false 697 // } 698 // } 699 // return true 700 // } 701 // return false 702 // } 703 // return vis(r0) 704 //} 705 706 //---------- 707 708 func mergeStringRules(r Rule) (*StringRule, error) { 709 switch t := r.(type) { 710 case *StringRule: 711 return t, nil 712 case *DefRule: 713 sr, err := mergeStringRules(t.onlyChild()) 714 if err != nil { 715 // improve error 716 err = fmt.Errorf("%v: %w", t.name, err) 717 } 718 return sr, err 719 case *OrRule: 720 // concat "or" rules 721 sr2 := &StringRule{typ: stringRTOr} 722 for _, c := range t.childs() { 723 if sr3, err := mergeStringRules(c); err != nil { 724 return nil, err 725 } else { 726 switch sr3.typ { 727 case stringRTOr: 728 sr2.runes = append(sr2.runes, sr3.runes...) 729 sr2.rranges = append(sr2.rranges, sr3.rranges...) 730 default: 731 return nil, fmt.Errorf("unable to merge %v from %v into orrule", sr3, t) 732 } 733 } 734 } 735 return sr2, nil 736 case *AndRule: 737 // concat "and" rules 738 sr2 := &StringRule{typ: stringRTAnd} 739 for _, c := range t.childs() { 740 if sr3, err := mergeStringRules(c); err != nil { 741 return nil, err 742 } else { 743 switch sr3.typ { 744 case stringRTAnd: 745 sr2.runes = append(sr2.runes, sr3.runes...) 746 sr2.rranges = append(sr2.rranges, sr3.rranges...) 747 default: 748 return nil, fmt.Errorf("unable to merge %v from %v into andrule", sr3, r) 749 } 750 } 751 } 752 return sr2, nil 753 default: 754 return nil, fmt.Errorf("unable to merge to stringrule: %T, %v", r, r) 755 } 756 } 757 758 //---------- 759 760 func reverseRulesCopy(w []Rule) []Rule { 761 u := make([]Rule, len(w)) 762 copy(u, w) 763 reverseRules(u) 764 return u 765 } 766 func reverseRules(w []Rule) { 767 l := len(w) 768 for i := 0; i < l/2; i++ { 769 k := l - 1 - i 770 w[i], w[k] = w[k], w[i] 771 } 772 } 773 774 //---------- 775 776 func walkRuleChilds(rule Rule, fn func(*Rule) error) error { 777 return rule.iterChildRefs(func(index int, ref *Rule) error { 778 return fn(ref) 779 }) 780 } 781 782 //---------- 783 //---------- 784 //---------- 785 786 // TODO: rename 787 type PStateParseFn func(ps *PState) error 788 789 //---------- 790 //---------- 791 //---------- 792 793 //type RuleProductions []RuleSequence 794 795 //func (rp RuleProductions) String() string { 796 // w := []string{} 797 // for _, rs := range rp { 798 // w = append(w, rs.String()) 799 // } 800 // u := strings.Join(w, " | ") 801 // return fmt.Sprintf("[%v]", u) 802 //} 803 804 ////---------- 805 806 //type RuleSequence []Rule 807 808 //func (rs RuleSequence) String() string { 809 // w := []string{} 810 // for _, r := range rs { 811 // w = append(w, r.String()) 812 // } 813 // u := strings.Join(w, " ") 814 // return fmt.Sprintf("[%v]", u) 815 //}