github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/operations.go (about) 1 package automaton 2 3 import ( 4 "container/list" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/util" 7 "unicode" 8 ) 9 10 // Basic automata operations. 11 12 /* 13 Returns an automaton that accepts the concatenation of the 14 languages of the given automata. 15 16 Complexity: linear in total number of states. 17 */ 18 func concatenate(a1, a2 *Automaton) *Automaton { 19 return concatenateN([]*Automaton{a1, a2}) 20 } 21 22 /* 23 Returns an automaton that accepts the concatenation of the 24 languages of the given automata. 25 26 Complexity: linear in total number of states. 27 */ 28 func concatenateN(l []*Automaton) *Automaton { 29 ans := newEmptyAutomaton() 30 31 // first pass: create all states 32 for _, a := range l { 33 if a.numStates() == 0 { 34 ans.finishState() 35 return ans 36 } 37 numStates := a.numStates() 38 for s := 0; s < numStates; s++ { 39 ans.createState() 40 } 41 } 42 43 // second pass: add transitions, carefully linking accept 44 // states of A to init state of next A: 45 stateOffset := 0 46 t := newTransition() 47 for i, a := range l { 48 numStates := a.numStates() 49 50 var nextA *Automaton 51 if i < len(l)-1 { 52 nextA = l[i+1] 53 } 54 55 for s := 0; s < numStates; s++ { 56 numTransitions := a.initTransition(s, t) 57 for j := 0; j < numTransitions; j++ { 58 a.nextTransition(t) 59 ans.addTransitionRange(stateOffset+s, stateOffset+t.dest, t.min, t.max) 60 } 61 62 if a.IsAccept(s) { 63 followA := nextA 64 followOffset := stateOffset 65 upto := i + 1 66 for { 67 if followA != nil { 68 // adds a "virtual" epsilon transition: 69 numTransitions = followA.initTransition(0, t) 70 for j := 0; j < numTransitions; j++ { 71 followA.nextTransition(t) 72 ans.addTransitionRange(stateOffset+s, followOffset+numStates+t.dest, t.min, t.max) 73 } 74 if followA.IsAccept(0) { 75 // keep chaning if followA accepts empty string 76 followOffset += followA.numStates() 77 if upto < len(l)-1 { 78 followA = l[upto+1] 79 } else { 80 followA = nil 81 } 82 upto++ 83 } else { 84 break 85 } 86 } else { 87 ans.setAccept(stateOffset+s, true) 88 break 89 } 90 } 91 } 92 } 93 94 stateOffset += numStates 95 } 96 97 if ans.numStates() == 0 { 98 ans.createState() 99 } 100 101 ans.finishState() 102 return ans 103 } 104 105 /* 106 Returns an automaton that accepts the union of the empty string and 107 the language of the given automaton. 108 109 Complexity: linear in number of states. 110 */ 111 func optional(a *Automaton) *Automaton { 112 ans := newEmptyAutomaton() 113 ans.createState() 114 ans.setAccept(0, true) 115 if a.numStates() > 0 { 116 ans.copy(a) 117 ans.addEpsilon(0, 1) 118 } 119 ans.finishState() 120 return ans 121 } 122 123 /* 124 Returns an automaton that accepts the Kleene star (zero or more 125 concatenated repetitions) of the language of the given automaton. 126 Never modifies the input automaton language. 127 128 Complexity: linear in number of states. 129 */ 130 func repeat(a *Automaton) *Automaton { 131 if isEmpty(a) { 132 return a 133 } 134 135 b := newAutomatonBuilder() 136 b.createState() 137 b.setAccept(0, true) 138 b.copy(a) 139 140 t := newTransition() 141 count := a.initTransition(0, t) 142 for i := 0; i < count; i++ { 143 a.nextTransition(t) 144 b.addTransitionRange(0, t.dest+1, t.min, t.max) 145 } 146 147 numStates := a.numStates() 148 for s := 0; s < numStates; s++ { 149 if a.IsAccept(s) { 150 count = a.initTransition(0, t) 151 for i := 0; i < count; i++ { 152 a.nextTransition(t) 153 b.addTransitionRange(s+1, t.dest+1, t.min, t.max) 154 } 155 } 156 } 157 158 return b.finish() 159 } 160 161 /* 162 Returns an automaton that accepts min or more concatenated 163 repetitions of the language of the given automaton. 164 165 Complexity: linear in number of states and in min. 166 */ 167 func repeatMin(a *Automaton, min int) *Automaton { 168 if min == 0 { 169 return repeat(a) 170 } 171 as := make([]*Automaton, 0, min+1) 172 for min > 0 { 173 as = append(as, a) 174 min-- 175 } 176 as = append(as, repeat(a)) 177 return concatenateN(as) 178 } 179 180 /* 181 Returns a (deterministic) automaton that accepts the complement of 182 the language of the given automaton. 183 184 Complexity: linear in number of states (if already deterministic). 185 */ 186 func complement(a *Automaton) *Automaton { 187 a = totalize(determinize(a)) 188 numStates := a.numStates() 189 for p := 0; p < numStates; p++ { 190 a.setAccept(p, !a.IsAccept(p)) 191 } 192 return removeDeadStates(a) 193 } 194 195 /* 196 Returns a (deterministic) automaton that accepts the intersection of 197 the language of a1 and the complement of the language of a2. As a 198 side-effect, the automata may be determinized, if not already 199 deterministic. 200 201 Complexity: quadratic in number of states (if already deterministic). 202 */ 203 func minus(a1, a2 *Automaton) *Automaton { 204 if isEmpty(a1) || a1 == a2 { 205 return MakeEmpty() 206 } 207 if isEmpty(a2) { 208 return a1 209 } 210 return intersection(a1, complement(a2)) 211 } 212 213 // Pair of states. 214 type StatePair struct{ s, s1, s2 int } 215 216 /* 217 Returns an automaton that accepts the intersection of the languages 218 of the given automata. Never modifies the input automata languages. 219 220 Complexity: quadratic in number of states. 221 */ 222 func intersection(a1, a2 *Automaton) *Automaton { 223 if a1 == a2 || a1.numStates() == 0 { 224 return a1 225 } 226 if a2.numStates() == 0 { 227 return a2 228 } 229 230 transitions1 := a1.sortedTransitions() 231 transitions2 := a2.sortedTransitions() 232 c := newEmptyAutomaton() 233 c.createState() 234 worklist := list.New() 235 newstates := make(map[string]*StatePair) 236 hash := func(p *StatePair) string { 237 return fmt.Sprintf("%v/%v", p.s1, p.s2) 238 } 239 p := &StatePair{0, 0, 0} 240 worklist.PushBack(p) 241 newstates[hash(p)] = p 242 for worklist.Len() > 0 { 243 p = worklist.Remove(worklist.Front()).(*StatePair) 244 c.setAccept(p.s, a1.IsAccept(p.s1) && a2.IsAccept(p.s2)) 245 t1 := transitions1[p.s1] 246 t2 := transitions2[p.s2] 247 for n1, b2 := 0, 0; n1 < len(t1); n1++ { 248 for b2 < len(t2) && t2[b2].max < t1[n1].min { 249 b2++ 250 } 251 for n2 := b2; n2 < len(t2) && t1[n1].max >= t2[n2].min; n2++ { 252 if t2[n2].max >= t1[n1].min { 253 q := &StatePair{-1, t1[n1].dest, t2[n2].dest} 254 r, ok := newstates[hash(q)] 255 if !ok { 256 q.s = c.createState() 257 worklist.PushBack(q) 258 newstates[hash(q)] = q 259 r = q 260 } 261 min := or(t1[n1].min > t2[n2].min, t1[n1].min, t2[n2].min).(int) 262 max := or(t1[n1].max < t2[n2].max, t1[n1].max, t2[n2].max).(int) 263 c.addTransitionRange(p.s, r.s, min, max) 264 } 265 } 266 } 267 } 268 c.finishState() 269 return removeDeadStates(c) 270 } 271 272 /* 273 Returns true if these two automata accept exactly the same language. 274 This is a costly computation! Note also that a1 and a2 will be 275 determinized as a side effect. 276 */ 277 func sameLanguage(a1, a2 *Automaton) bool { 278 if a1 == a2 { 279 return true 280 } 281 return subsetOf(a2, a1) && subsetOf(a1, a2) 282 } 283 284 /* 285 Returns true if the automaton has any states that cannot be reached 286 from the initial state or cannot reach an accept state. 287 Cost is O(numTransitions+numStates). 288 */ 289 func hasDeadStates(a *Automaton) bool { 290 liveStates := liveStates(a) 291 numLive := liveStates.Cardinality() 292 numStates := a.numStates() 293 assert2(numLive <= int64(numStates), "numLive=%v numStates=%v %v", numLive, numStates, liveStates) 294 return numLive < int64(numStates) 295 } 296 297 func hasDeadStatesFromInitial(a *Automaton) bool { 298 r1 := liveStatesFromInitial(a) 299 r2 := liveStatesToAccept(a) 300 r1.AndNot(r2) 301 return !r1.IsEmpty() 302 } 303 304 /* 305 Returns true if the language of a1 is a subset of the language of a2. 306 As a side-effect, a2 is determinized if not already marked as 307 deterministic. 308 */ 309 func subsetOf(a1, a2 *Automaton) bool { 310 assert2(a1.deterministic, "a1 must be deterministic") 311 assert2(a2.deterministic, "a2 must be deterministic") 312 assert(!hasDeadStatesFromInitial(a1)) 313 assert2(!hasDeadStatesFromInitial(a2), "%v", a2) 314 if a1.numStates() == 0 { 315 // empty language is always a subset of any other language 316 return true 317 } else if a2.numStates() == 0 { 318 return isEmpty(a1) 319 } 320 321 transitions1 := a1.sortedTransitions() 322 transitions2 := a2.sortedTransitions() 323 worklist := list.New() 324 visited := make(map[string]*StatePair) 325 hash := func(p *StatePair) string { 326 return fmt.Sprintf("%v/%v", p.s1, p.s2) 327 } 328 p := &StatePair{-1, 0, 0} 329 worklist.PushBack(p) 330 visited[hash(p)] = p 331 for worklist.Len() > 0 { 332 p = worklist.Remove(worklist.Front()).(*StatePair) 333 if a1.IsAccept(p.s1) && !a2.IsAccept(p.s2) { 334 return false 335 } 336 t1 := transitions1[p.s1] 337 t2 := transitions2[p.s2] 338 for n1, b2, t1Len := 0, 0, len(t1); n1 < t1Len; n1++ { 339 t2Len := len(t2) 340 for b2 < t2Len && t2[b2].max < t1[n1].min { 341 b2++ 342 } 343 min1, max1 := t1[n1].min, t1[n1].max 344 345 for n2 := b2; n2 < t2Len && t1[n1].max >= t2[n2].min; n2++ { 346 if t2[n2].min > min1 { 347 return false 348 } 349 if t2[n2].max < unicode.MaxRune { 350 min1 = t2[n2].max + 1 351 } else { 352 min1, max1 = unicode.MaxRune, MIN_CODE_POINT 353 } 354 q := &StatePair{-1, t1[n1].dest, t2[n2].dest} 355 if _, ok := visited[hash(q)]; !ok { 356 worklist.PushBack(q) 357 visited[hash(q)] = q 358 } 359 } 360 if min1 <= max1 { 361 return false 362 } 363 } 364 } 365 return true 366 } 367 368 /* 369 Returns an automaton that accepts the union of the languages of the 370 given automta. 371 372 Complexity: linear in number of states. 373 */ 374 func union(a1, a2 *Automaton) *Automaton { 375 return unionN([]*Automaton{a1, a2}) 376 } 377 378 /* 379 Returns an automaton that accepts the union of the languages of the 380 given automata. 381 382 Complexity: linear in number of states. 383 */ 384 func unionN(l []*Automaton) *Automaton { 385 ans := newEmptyAutomaton() 386 // create initial state 387 ans.createState() 388 // copy over all automata 389 for _, a := range l { 390 ans.copy(a) 391 } 392 // add epsilon transition from new initial state 393 stateOffset := 1 394 for _, a := range l { 395 if a.numStates() == 0 { 396 continue 397 } 398 ans.addEpsilon(0, stateOffset) 399 stateOffset += a.numStates() 400 } 401 ans.finishState() 402 return removeDeadStates(ans) 403 } 404 405 /* Simple custom []*Transition */ 406 type TransitionList struct { 407 transitions []int // dest,min,max 408 } 409 410 func (l *TransitionList) add(t *Transition) { 411 l.transitions = append(l.transitions, t.dest, t.min, t.max) 412 } 413 414 // Holds all transitions that start on this int point, or end at this 415 // point-1 416 type PointTransitions struct { 417 point int 418 ends *TransitionList 419 starts *TransitionList 420 } 421 422 func newPointTransitions() *PointTransitions { 423 return &PointTransitions{ 424 ends: new(TransitionList), 425 starts: new(TransitionList), 426 } 427 } 428 429 func (pt *PointTransitions) reset(point int) { 430 pt.point = point 431 pt.ends.transitions = pt.ends.transitions[:0] 432 pt.starts.transitions = pt.starts.transitions[:0] 433 } 434 435 const HASHMAP_CUTOVER = 30 436 437 type PointTransitionSet struct { 438 points []*PointTransitions 439 dict map[int]*PointTransitions 440 useHash bool 441 } 442 443 func newPointTransitionSet() *PointTransitionSet { 444 return &PointTransitionSet{ 445 points: make([]*PointTransitions, 0, 5), 446 dict: make(map[int]*PointTransitions), 447 useHash: false, 448 } 449 } 450 451 func (pts *PointTransitionSet) next(point int) *PointTransitions { 452 // 1st time we are seeing this point 453 p := newPointTransitions() 454 pts.points = append(pts.points, p) 455 p.reset(point) 456 return p 457 } 458 459 func (pts *PointTransitionSet) find(point int) *PointTransitions { 460 if pts.useHash { 461 p, ok := pts.dict[point] 462 if !ok { 463 p = pts.next(point) 464 pts.dict[point] = p 465 } 466 return p 467 } 468 469 for _, p := range pts.points { 470 if p.point == point { 471 return p 472 } 473 } 474 475 p := pts.next(point) 476 if len(pts.points) == HASHMAP_CUTOVER { 477 // switch to hash map on the fly 478 assert(len(pts.dict) == 0) 479 for _, v := range pts.points { 480 pts.dict[v.point] = v 481 } 482 pts.useHash = true 483 } 484 return p 485 } 486 487 func (pts *PointTransitionSet) reset() { 488 if pts.useHash { 489 pts.dict = make(map[int]*PointTransitions) 490 pts.useHash = false 491 } 492 pts.points = pts.points[:0] // reuse slice 493 } 494 495 type PointTransitionsArray []*PointTransitions 496 497 func (a PointTransitionsArray) Len() int { return len(a) } 498 func (a PointTransitionsArray) Less(i, j int) bool { return a[i].point < a[j].point } 499 func (a PointTransitionsArray) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 500 501 func (pts *PointTransitionSet) sort() { 502 // Tim sort performs well on already sorted arrays: 503 if len(pts.points) > 0 { 504 util.TimSort(PointTransitionsArray(pts.points)) 505 } 506 } 507 508 func (pts *PointTransitionSet) add(t *Transition) { 509 pts.find(t.min).starts.add(t) 510 pts.find(1 + t.max).ends.add(t) 511 } 512 513 func (pts *PointTransitionSet) String() string { 514 panic("not implemented yet") 515 } 516 517 /* 518 Determinizes the given automaton. 519 520 Split the code points in ranges, and merge overlapping states. 521 522 Worst case complexity: exponential in number of states. 523 */ 524 func determinize(a *Automaton) *Automaton { 525 if a.deterministic || a.numStates() <= 1 { 526 return a 527 } 528 529 // subset construction 530 b := newAutomatonBuilder() 531 532 // fmt.Println("DET:") 533 534 initialset := newFrozenIntSetOf(0, 0) 535 536 // craete state 0: 537 b.createState() 538 539 worklist := list.New() 540 newstate := make(map[string]int) 541 hash := func(s *FrozenIntSet) string { 542 return s.String() 543 } 544 545 worklist.PushBack(initialset) 546 547 b.setAccept(0, a.IsAccept(0)) 548 newstate[hash(initialset)] = 0 549 550 // like map[int]*PointTransitions 551 points := newPointTransitionSet() 552 553 // like sorted map[int]int 554 statesSet := newSortedIntSet(5) 555 556 t := newTransition() 557 558 for worklist.Len() > 0 { 559 s := worklist.Remove(worklist.Front()).(*FrozenIntSet) 560 // fmt.Printf("det: pop set=%v\n", s) 561 562 // Collate all outgoing transitions by min/1+max 563 for _, s0 := range s.values { 564 numTransitions := a.numTransitions(s0) 565 a.initTransition(s0, t) 566 for j := 0; j < numTransitions; j++ { 567 a.nextTransition(t) 568 points.add(t) 569 } 570 } 571 572 if len(points.points) == 0 { 573 // No outgoing transitions -- skip it 574 continue 575 } 576 577 points.sort() 578 579 lastPoint := -1 580 accCount := 0 581 582 r := s.state 583 for _, v := range points.points { 584 point := v.point 585 586 if len(statesSet.values) > 0 { 587 assert(lastPoint != -1) 588 589 hashKey := statesSet.computeHash().String() 590 591 q, ok := newstate[hashKey] 592 if !ok { 593 q = b.createState() 594 p := statesSet.freeze(q) 595 // fmt.Printf(" make new state=%v -> %v accCount=%v\n", q, p, accCount) 596 worklist.PushBack(p) 597 b.setAccept(q, accCount > 0) 598 newstate[hash(p)] = q 599 } else { 600 assert2(b.isAccept(q) == (accCount > 0), 601 "accCount=%v vs existing accept=%v states=%v", 602 accCount, b.isAccept(q), statesSet) 603 } 604 605 // fmt.Printf(" add trans src=%v dest=%v min=%v max=%v\n", 606 // r, q, lastPoint, point-1) 607 b.addTransitionRange(r, q, lastPoint, point-1) 608 } 609 610 // process transitions that end on this point 611 // (closes an overlapping interval) 612 for j, limit := 0, len(v.ends.transitions); j < limit; j += 3 { 613 dest := v.ends.transitions[j] 614 statesSet.decr(dest) 615 if a.IsAccept(dest) { 616 accCount-- 617 } 618 } 619 v.ends.transitions = v.ends.transitions[:0] // reuse slice 620 621 // process transitions that start on this point 622 // (opens a new interval) 623 for j, limit := 0, len(v.starts.transitions); j < limit; j += 3 { 624 dest := v.starts.transitions[j] 625 statesSet.incr(dest) 626 if a.IsAccept(dest) { 627 accCount++ 628 } 629 } 630 v.starts.transitions = v.starts.transitions[:0] // reuse slice 631 632 lastPoint = point 633 } 634 points.reset() 635 assert2(len(statesSet.values) == 0, "upto=%v", len(statesSet.values)) 636 } 637 638 ans := b.finish() 639 assert(ans.deterministic) 640 return ans 641 } 642 643 // // L779 644 // Returns true if the given automaton accepts no strings. 645 func isEmpty(a *Automaton) bool { 646 if a.numStates() == 0 { 647 // common case: no states 648 return true 649 } 650 if !a.IsAccept(0) && a.numTransitions(0) == 0 { 651 // common case: just one initial state 652 return true 653 } 654 if a.IsAccept(0) { 655 // apparently common case: it accepts the empty string 656 return false 657 } 658 659 workList := list.New() 660 seen := util.NewOpenBitSet() 661 workList.PushBack(0) 662 seen.Set(0) 663 664 t := newTransition() 665 for workList.Len() > 0 { 666 state := workList.Remove(workList.Front()).(int) 667 if a.IsAccept(state) { 668 return false 669 } 670 count := a.initTransition(state, t) 671 for i := 0; i < count; i++ { 672 a.nextTransition(t) 673 if !seen.Get(int64(t.dest)) { 674 workList.PushBack(t.dest) 675 seen.Set(int64(t.dest)) 676 } 677 } 678 } 679 680 return true 681 } 682 683 // /* 684 // Returns true if the given string is accepted by the autmaton. 685 686 // Complexity: linear in the length of the string. 687 688 // Note: for fll performance, use the RunAutomation class. 689 // */ 690 // func run(a *Automaton, s string) bool { 691 // if a.isSingleton() { 692 // return s == a.singleton 693 // } 694 // if a.deterministic { 695 // p := a.initial 696 // for _, ch := range s { 697 // q := p.step(int(ch)) 698 // if q == nil { 699 // return false 700 // } 701 // p = q 702 // } 703 // return p.accept 704 // } 705 // // states := a.NumberedStates() 706 // panic("not implemented yet") 707 // } 708 709 /* 710 Returns the set of live states. A state is "live" if an accept state 711 is reachable from it and if it is reachable from the initial state. 712 */ 713 func liveStates(a *Automaton) *util.OpenBitSet { 714 live := liveStatesFromInitial(a) 715 live.And(liveStatesToAccept(a)) 716 return live 717 } 718 719 /* Returns BitSet marking states reachable from the initial state. */ 720 func liveStatesFromInitial(a *Automaton) *util.OpenBitSet { 721 numStates := a.numStates() 722 live := util.NewOpenBitSet() 723 if numStates == 0 { 724 return live 725 } 726 workList := list.New() 727 live.Set(0) 728 workList.PushBack(0) 729 730 t := newTransition() 731 for workList.Len() > 0 { 732 s := workList.Remove(workList.Front()).(int) 733 count := a.initTransition(s, t) 734 for i := 0; i < count; i++ { 735 a.nextTransition(t) 736 if !live.Get(int64(t.dest)) { 737 live.Set(int64(t.dest)) 738 workList.PushBack(t.dest) 739 } 740 } 741 } 742 743 return live 744 } 745 746 /* Returns BitSet marking states that can reach an accept state. */ 747 func liveStatesToAccept(a *Automaton) *util.OpenBitSet { 748 builder := newAutomatonBuilder() 749 750 // NOTE: not quite the same thing as what SpecialOperations.reverse does: 751 t := newTransition() 752 numStates := a.numStates() 753 for s := 0; s < numStates; s++ { 754 builder.createState() 755 } 756 for s := 0; s < numStates; s++ { 757 count := a.initTransition(s, t) 758 for i := 0; i < count; i++ { 759 a.nextTransition(t) 760 builder.addTransitionRange(t.dest, s, t.min, t.max) 761 } 762 } 763 a2 := builder.finish() 764 765 workList := list.New() 766 live := util.NewOpenBitSet() 767 acceptBits := a.isAccept 768 s := 0 769 for s < numStates { 770 s = int(acceptBits.NextSetBit(int64(s))) 771 if s == -1 { 772 break 773 } 774 live.Set(int64(s)) 775 workList.PushBack(s) 776 s++ 777 } 778 779 for workList.Len() > 0 { 780 s = workList.Remove(workList.Front()).(int) 781 count := a2.initTransition(s, t) 782 for i := 0; i < count; i++ { 783 a2.nextTransition(t) 784 if !live.Get(int64(t.dest)) { 785 live.Set(int64(t.dest)) 786 workList.PushBack(t.dest) 787 } 788 } 789 } 790 791 return live 792 } 793 794 /* 795 Removes transitions to dead states (a state is "dead" if it is not 796 reachable from the initial state or no accept state is reachable from 797 it.) 798 */ 799 func removeDeadStates(a *Automaton) *Automaton { 800 numStates := a.numStates() 801 liveSet := liveStates(a) 802 803 m := make([]int, numStates) 804 805 ans := newEmptyAutomaton() 806 // fmt.Printf("liveSet: %v numStates=%v\n", liveSet, numStates) 807 for i := 0; i < numStates; i++ { 808 if liveSet.Get(int64(i)) { 809 m[i] = ans.createState() 810 ans.setAccept(m[i], a.IsAccept(i)) 811 } 812 } 813 814 t := newTransition() 815 816 for i := 0; i < numStates; i++ { 817 if liveSet.Get(int64(i)) { 818 numTransitions := a.initTransition(i, t) 819 // filter out transitions to dead states: 820 for j := 0; j < numTransitions; j++ { 821 a.nextTransition(t) 822 if liveSet.Get(int64(t.dest)) { 823 ans.addTransitionRange(m[i], m[t.dest], t.min, t.max) 824 } 825 } 826 } 827 } 828 829 ans.finishState() 830 assert(!hasDeadStates(ans)) 831 return ans 832 } 833 834 /* 835 Finds the largest entry whose value is less than or equal to c, or 836 0 if there is no such entry. 837 */ 838 func findIndex(c int, points []int) int { 839 a, b := 0, len(points) 840 for b-a > 1 { 841 d := int(uint(a+b) >> 1) 842 if points[d] > c { 843 b = d 844 } else if points[d] < c { 845 a = d 846 } else { 847 return d 848 } 849 } 850 return a 851 } 852 853 /* Returns an automaton accepting the reverse language. */ 854 func reverse(a *Automaton) (*Automaton, map[int]bool) { 855 if isEmpty(a) { 856 return newEmptyAutomaton(), nil 857 } 858 859 numStates := a.numStates() 860 861 // build a new automaton with all edges reversed 862 b := newAutomatonBuilder() 863 864 // initial node; we'll add epsilon transitions in the end: 865 b.createState() 866 for s := 0; s < numStates; s++ { 867 b.createState() 868 } 869 870 // old initial state becomes new accept state: 871 b.setAccept(1, true) 872 873 t := newTransition() 874 for s := 0; s < numStates; s++ { 875 numTransitions := a.numTransitions(s) 876 a.initTransition(s, t) 877 for i := 0; i < numTransitions; i++ { 878 a.nextTransition(t) 879 b.addTransitionRange(t.dest+1, s+1, t.min, t.max) 880 } 881 } 882 883 ans := b.finish() 884 initialStates := make(map[int]bool) 885 886 acceptStates := a.isAccept 887 for s := acceptStates.NextSetBit(0); s != -1; s = acceptStates.NextSetBit(s + 1) { 888 ans.addEpsilon(0, int(s+1)) 889 initialStates[int(s+1)] = true 890 } 891 892 ans.finishState() 893 return ans, initialStates 894 } 895 896 /* 897 Returns a new automaton accepting the same language with added 898 transitions to a dead state so that from every state and every label 899 there is a transition. 900 */ 901 func totalize(a *Automaton) *Automaton { 902 ans := newEmptyAutomaton() 903 numStates := a.numStates() 904 for i := 0; i < numStates; i++ { 905 ans.createState() 906 ans.setAccept(i, a.IsAccept(i)) 907 } 908 909 deadState := ans.createState() 910 ans.addTransitionRange(deadState, deadState, MIN_CODE_POINT, unicode.MaxRune) 911 912 t := newTransition() 913 for i := 0; i < numStates; i++ { 914 maxi := MIN_CODE_POINT 915 count := a.initTransition(i, t) 916 for j := 0; j < count; j++ { 917 a.nextTransition(t) 918 ans.addTransitionRange(i, t.dest, t.min, t.max) 919 if t.min > maxi { 920 ans.addTransitionRange(i, deadState, maxi, t.min-1) 921 } 922 if t.max+1 > maxi { 923 maxi = t.max + 1 924 } 925 } 926 927 if maxi <= unicode.MaxRune { 928 ans.addTransitionRange(i, deadState, maxi, unicode.MaxRune) 929 } 930 } 931 932 ans.finishState() 933 return ans 934 }