github.com/jmigpin/editor@v1.6.0/util/parseutil/lrparser/contentparser.go (about) 1 package lrparser 2 3 import ( 4 "bytes" 5 "fmt" 6 "strings" 7 8 "github.com/jmigpin/editor/util/goutil" 9 ) 10 11 type ContentParser struct { 12 Opt *CpOpt 13 vd *VerticesData 14 sd *StatesData 15 buildNodeFns map[Rule]BuildNodeFn 16 } 17 18 func newContentParser(opt *CpOpt, ri *RuleIndex) (*ContentParser, error) { 19 cp := &ContentParser{Opt: opt} 20 cp.buildNodeFns = map[Rule]BuildNodeFn{} 21 22 vd, err := newVerticesData(ri, cp.Opt.StartRule, cp.Opt.Reverse) 23 if err != nil { 24 return nil, err 25 } 26 cp.vd = vd 27 28 sd, err := newStatesData(vd, cp.Opt.ShiftOnSRConflict) 29 if err != nil { 30 if sd != nil { 31 err = fmt.Errorf("%w\n%v\n%v\n%v", err, ri, vd.rFirst, sd) 32 } 33 return nil, err 34 } 35 cp.sd = sd 36 37 return cp, nil 38 } 39 40 //---------- 41 42 func (cp *ContentParser) Parse(src []byte, index int) (*BuildNodeData, *cpRun, error) { 43 fset := NewFileSetFromBytes(src) 44 return cp.ParseFileSet(fset, index, nil) 45 } 46 func (cp *ContentParser) ParseFileSet(fset *FileSet, index int, extData any) (*BuildNodeData, *cpRun, error) { 47 ps := NewPState(fset.Src) 48 ps.Pos = index 49 ps.Reverse = cp.Opt.Reverse 50 cpr := newCPRun(cp.Opt, ps) 51 cpr.externalData = extData 52 cpn, err := cp.parse3(cpr) 53 if err != nil { 54 pe := &PosError{Err: err, Pos: cpr.ps.Pos} 55 err = fset.Error(pe) 56 if cpr.opt.VerboseError { 57 err = fmt.Errorf("%w\n%s", err, cpr.Debug(cp)) 58 } 59 return nil, cpr, err 60 } 61 d := newBuildNodeData(cpr, cpn) 62 return d, cpr, nil 63 } 64 65 //---------- 66 67 func (cp *ContentParser) parse3(cpr *cpRun) (*CPNode, error) { 68 // add initial state to stack 69 cpn0 := newCPNode(cpr.ps.Pos, cpr.ps.Pos, nil) 70 item0 := &cpsItem{st: cp.sd.states[0], cpn: cpn0} 71 cpr.stk = cpStack{item0} 72 cpr.logf("%v\n", cpr.stk) 73 // first input (action rule) 74 prule, err := cp.nextParseRule(cpr, item0.st) 75 if err != nil { 76 return nil, err 77 } 78 // run forever 79 for { 80 item := cpr.stk[len(cpr.stk)-1] // stack top 81 82 as := item.st.action[prule] 83 // TODO: deal with this error at statesdata build time? 84 if len(as) != 1 { 85 return nil, fmt.Errorf("expected one action for %v, got %v (st=%v)", prule, as, item.st.id) 86 } 87 a := as[0] 88 89 switch t := a.(type) { 90 case *ActionShift: 91 prule, err = cp.shift(cpr, t) 92 if err != nil { 93 return nil, err 94 } 95 case *ActionReduce: 96 if err := cp.reduce(cpr, t); err != nil { 97 return nil, err 98 } 99 case *ActionAccept: 100 // handle earlystop (nodes with errors) 101 if item.cpn.simulated { 102 return nil, cpr.earlyStop.err 103 } 104 105 return item.cpn, nil 106 default: 107 return nil, goutil.TodoError() 108 } 109 } 110 } 111 func (cp *ContentParser) shift(cpr *cpRun, t *ActionShift) (Rule, error) { 112 // correct simulated node position 113 cpn := cpr.ps.Node.(*CPNode) 114 if cpn.simulated { 115 i := cpr.stk.topEnd() 116 cpn.SetPos(i, i) 117 } 118 119 cpr.logf("shift %v\n", t.st.id) 120 item := &cpsItem{st: t.st, cpn: cpn} 121 cpr.stk = append(cpr.stk, item) 122 cpr.logf("%v\n", cpr.stk) 123 124 if err := cp.buildNode(cpr, cpn.rule, cpn); err != nil { 125 return nil, err 126 } 127 128 // next input 129 return cp.nextParseRule(cpr, t.st) 130 } 131 func (cp *ContentParser) reduce(cpr *cpRun, ar *ActionReduce) error { 132 if cpr.isLogging() { // performance 133 cpr.logf("reduce to %v (pop %v)\n", ar.prod.id(), ar.popN) 134 } 135 136 // pop n items 137 popPos := len(cpr.stk) - ar.popN 138 pops := cpr.stk[popPos:] 139 cpr.stk = cpr.stk[:popPos] // pop 140 141 // use current stk top to find the rule transition 142 item3 := cpr.stk[len(cpr.stk)-1] // top of stack 143 st2, ok := item3.st.gotoSt[ar.prod] 144 if !ok { 145 return fmt.Errorf("no goto for rule %v in %v ", ar.prod.id(), item3.st.id) 146 } 147 cpn, err := cp.groupPopped(cpr, ar, pops) 148 if err != nil { 149 return err 150 } 151 item4 := &cpsItem{st: st2, cpn: cpn} 152 cpr.stk = append(cpr.stk, item4) // push "goto" to stk 153 cpr.logf("%v\n", cpr.stk) 154 155 return cp.buildNode(cpr, ar.prod, cpn) 156 } 157 158 //---------- 159 160 func (cp *ContentParser) buildNode(cpr *cpRun, r Rule, cpn *CPNode) error { 161 if cpn.simulated { 162 return nil 163 } 164 fn, ok := cp.buildNodeFns[r] 165 if !ok { 166 return nil 167 } 168 d := newBuildNodeData(cpr, cpn) 169 return fn(d) 170 } 171 172 //---------- 173 174 func (cp *ContentParser) groupPopped(cpr *cpRun, ar *ActionReduce, pops []*cpsItem) (*CPNode, error) { 175 cpn := cp.groupPopped2(cpr, ar, pops) 176 cp.propagateSimulatedAndRecover(cpr, ar, cpn) 177 return cpn, nil 178 } 179 func (cp *ContentParser) groupPopped2(cpr *cpRun, ar *ActionReduce, pops []*cpsItem) *CPNode { 180 if len(pops) == 0 { // handle no pops reductions (nil rules) 181 i := cpr.stk.topEnd() 182 cpn := newCPNode(i, i, ar.prod) 183 return cpn 184 } else { 185 // group popped items nodes into one node 186 w := make([]*CPNode, 0, len(pops)) 187 for _, item2 := range pops { 188 w = append(w, item2.cpn) 189 } 190 cpn := newCPNode2(w[0], w[len(w)-1], ar.prod) 191 isReverse := cp.Opt.Reverse && ruleProdCanReverse(ar.prod) 192 cpn.addChilds(isReverse, w...) 193 return cpn 194 } 195 } 196 func (cp *ContentParser) propagateSimulatedAndRecover(cpr *cpRun, ar *ActionReduce, cpn *CPNode) { 197 simulatedChilds := false 198 for _, cpn2 := range cpn.childs { 199 if cpn2.simulated { 200 simulatedChilds = true 201 break 202 } 203 } 204 if !simulatedChilds { 205 return 206 } 207 208 // attempt to recover simulated childs 209 if dr, ok := cpn.rule.(*DefRule); ok { 210 if dr.isPOptional { 211 cpn.childs = nil 212 cpn.SetPos(cpn.Pos(), cpn.Pos()) // clear end (as if empty) 213 cpr.logf("recovered: optional\n") 214 return 215 } 216 if dr.isPZeroOrMore { 217 *cpn = *cpn.childs[0] 218 cpr.logf("recovered: pZeroOrMore\n") 219 return 220 } 221 if dr.isPOneOrMore { 222 if !cpn.childs[0].PosEmpty() { 223 cpn.childs = cpn.childs[0].childs 224 cpr.logf("recovered: pOneOrMore\n") 225 return 226 } 227 } 228 } 229 230 // simulated 231 cpn.simulated = true 232 cpn.childs = nil 233 cpn.SetPos(cpn.Pos(), cpn.Pos()) // clear end (as if empty) 234 } 235 236 //---------- 237 238 func (cp *ContentParser) nextParseRule(cpr *cpRun, st *State) (Rule, error) { 239 cpr.logf("rset: %v\n", st.rsetSorted) 240 241 if cpr.earlyStop.on { 242 return cp.simulateParseRuleSet(cpr, st) 243 } 244 245 r, err := cp.parseRuleSet(cpr, st.rsetSorted) 246 if err == nil { 247 return r, nil 248 } 249 250 // allow input to not be fully consumed 251 if cp.Opt.EarlyStop { 252 cpr.logf("earlystop: %v\n", err) 253 cpr.earlyStop.on = true 254 cpr.earlyStop.err = &PosError{Err: err, Pos: cpr.ps.Pos} 255 return cp.simulateParseRuleSet(cpr, st) 256 } 257 258 return nil, err 259 } 260 261 //---------- 262 263 func (cp *ContentParser) simulateParseRuleSet(cpr *cpRun, st *State) (Rule, error) { 264 // rule to simulate 265 r := (Rule)(nil) 266 if st.rsetHasEndRule { // performance: faster stop (not necessary) 267 r = endRule 268 } else { 269 if len(st.rsetSorted) == 0 { 270 return nil, fmt.Errorf("empty rset to simulate") 271 } 272 273 // get index to try next 274 k := cpr.earlyStop.simStateRsetIter[st] % len(st.rsetSorted) 275 cpr.earlyStop.simStateRsetIter[st]++ 276 maxIter := 20 277 if cpr.earlyStop.simStateRsetIter[st] >= maxIter { 278 return nil, fmt.Errorf("reached max simulated attempts: %v; %w", maxIter, cpr.earlyStop.err) 279 } 280 281 r = st.rsetSorted[k] 282 } 283 284 i := cpr.stk.topEnd() 285 cpn := newCPNode(i, i, r) 286 cpn.simulated = true 287 cpr.ps.Node = cpn 288 if cpr.isLogging() { // performance 289 cpr.logf("simulate parseruleset: %v %v\n", r.id(), PNodePosStr(cpn)) 290 } 291 292 return r, nil 293 } 294 295 //---------- 296 297 // creates a cpnode in ps 298 func (cp *ContentParser) parseRuleSet(cpr *cpRun, rset []Rule) (Rule, error) { 299 for _, r := range rset { 300 if err := cp.parseRule(cpr.ps, r); err != nil { 301 continue 302 } 303 if cpr.isLogging() { // performance 304 cpr.logf("parseruleset: %v %v\n", r.id(), PNodePosStr(cpr.ps.Node)) 305 } 306 return r, nil 307 } 308 return nil, fmt.Errorf("failed to parse next: %v", rset) 309 } 310 311 func (cp *ContentParser) parseRule(ps *PState, r Rule) error { 312 switch t := r.(type) { 313 case *StringRule: 314 pos0 := ps.Pos 315 if err := t.parse(ps); err != nil { 316 return err 317 } 318 ps.Node = newCPNode(pos0, ps.Pos, t) 319 case *FuncRule: 320 pos0 := ps.KeepPos() 321 if err := t.fn(ps); err != nil { 322 pos0.Restore() 323 return err 324 } 325 ps.Node = newCPNode(pos0.Pos, ps.Pos, t) 326 case *SingletonRule: 327 switch t { 328 //case nilRule: // commented: not called to be parsed 329 case endRule: 330 if !ps.M.Eof() { 331 return fmt.Errorf("not eof") 332 } 333 ps.Node = newCPNode(ps.Pos, ps.Pos, t) 334 default: 335 panic(goutil.TodoErrorStr(t.name)) 336 } 337 default: 338 panic(goutil.TodoErrorType(t)) 339 } 340 return nil 341 } 342 343 //---------- 344 345 func (cp *ContentParser) SetBuildNodeFn(name string, buildFn BuildNodeFn) error { 346 r, ok := cp.vd.rFirst.ri.get(name) 347 if !ok { 348 return fmt.Errorf("rule name not found: %v", name) 349 } 350 cp.buildNodeFns[r] = buildFn 351 return nil 352 } 353 354 //---------- 355 //---------- 356 //---------- 357 358 // content parser options 359 type CpOpt struct { 360 StartRule string // can be empty, will try to get it from grammar 361 VerboseError bool 362 EarlyStop bool // artificially parses an endrule when nextparsedrule fails. Allows parsing to stop successfully when no more input is recognized (although there is still input), while the rules are still able to reduce correctly. 363 ShiftOnSRConflict bool 364 Reverse bool // runs input/rules in reverse (useful to backtrack in the middle of big inputs to then parse normally) 365 } 366 367 //---------- 368 //---------- 369 //---------- 370 371 type cpRun struct { 372 opt *CpOpt 373 ps *PState 374 stk cpStack 375 earlyStop struct { 376 on bool 377 err error 378 simStateRsetIter map[*State]int // iterate over state rset rules to avoid repeating simulated 379 } 380 logBuf bytes.Buffer 381 externalData any 382 } 383 384 func newCPRun(opt *CpOpt, ps *PState) *cpRun { 385 cpr := &cpRun{opt: opt, ps: ps} 386 cpr.earlyStop.simStateRsetIter = map[*State]int{} 387 return cpr 388 } 389 func (cpr *cpRun) isLogging() bool { 390 return cpr.opt.VerboseError 391 } 392 func (cpr *cpRun) logf(f string, args ...any) { 393 if cpr.isLogging() { 394 fmt.Fprintf(&cpr.logBuf, f, args...) 395 } 396 } 397 func (cpr *cpRun) Debug(cp *ContentParser) string { 398 return fmt.Sprintf("%s\n%s\n%s\n%s%s", 399 cp.vd.rFirst.ri, 400 cp.vd.rFirst, 401 cp.vd, 402 cp.sd, 403 bytes.TrimSpace(cpr.logBuf.Bytes()), 404 ) 405 } 406 407 //---------- 408 //---------- 409 //---------- 410 411 // content parser stack 412 type cpStack []*cpsItem 413 414 func (stk cpStack) topEnd() int { 415 k := len(stk) - 1 416 return stk[k].cpn.End() 417 } 418 419 //godebug:annotateoff 420 func (stk cpStack) String() string { 421 u := []string{} 422 for _, item := range stk { 423 s := fmt.Sprintf("%v:", item.st.id) 424 if item.cpn != nil { // can be nil in state0 425 if item.cpn.rule != nil { // can be nil in state0 426 s += fmt.Sprintf(" %v", item.cpn.rule.id()) 427 } 428 s += " " + PNodePosStr(item.cpn) 429 if item.cpn.simulated { 430 s += fmt.Sprintf(" (simulated)") 431 } 432 } 433 u = append(u, s) 434 } 435 return fmt.Sprintf("stk{\n\t%v\n}", strings.Join(u, "\n\t")) 436 } 437 438 //---------- 439 440 // content parser stack item 441 type cpsItem struct { 442 st *State 443 cpn *CPNode 444 //simulated bool // TODO: move cpn.simulated here 445 } 446 447 //---------- 448 //---------- 449 //---------- 450 451 func indentStr(t string, u string) string { 452 u = strings.TrimRight(u, "\n") 453 u = t + strings.ReplaceAll(u, "\n", "\n"+t) + "\n" 454 return u 455 }