github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/automaton.go (about) 1 package automaton 2 3 import ( 4 "fmt" 5 "github.com/balzaczyy/golucene/core/util" 6 "sort" 7 "unicode" 8 ) 9 10 // util/automaton/Automaton.java 11 12 /* 13 Represents an automaton and all its states and transitions. States 14 are integers and must be created using {@link #createState}. Mark a 15 state as an accept state using {@link #setAccept}. Add transitions 16 using {@link #addTransition}. Each state must have all of its 17 transitions added at once; if this is too restrictive then use 18 {@link Automaton.Builder} instead. State 0 is always the 19 initial state. Once a state is finished, either 20 because you've starting adding transitions to another state or you 21 call {@link #finishState}, then that states transitions are sorted 22 (first by min, then max, then dest) and reduced (transitions with 23 adjacent labels going to the same dest are combined). 24 */ 25 type Automaton struct { 26 curState int 27 states []int // 2x 28 transitions []int // 3x 29 isAccept *util.OpenBitSet 30 deterministic bool 31 } 32 33 func newEmptyAutomaton() *Automaton { 34 return &Automaton{ 35 deterministic: true, 36 curState: -1, 37 isAccept: util.NewOpenBitSet(), 38 } 39 } 40 41 func (a *Automaton) String() string { 42 return fmt.Sprintf("{curState=%v,states=%v,transitions=%v,isAccept=%v,%v}", 43 a.curState, a.states, a.transitions, a.isAccept, a.deterministic) 44 } 45 46 /* Create a new state. */ 47 func (a *Automaton) createState() int { 48 state := len(a.states) / 2 49 a.states = append(a.states, -1, 0) 50 return state 51 } 52 53 /* Set or clear this state as an accept state. */ 54 func (a *Automaton) setAccept(state int, accept bool) { 55 assert2(state < a.numStates(), "state=%v is out of bounds (numStates=%v)", state, a.numStates()) 56 if accept { 57 a.isAccept.Set(int64(state)) 58 } else { 59 a.isAccept.Clear(int64(state)) 60 } 61 } 62 63 /* 64 Sugar to get all transitions for all states. This is object-heavy; 65 it's better to iterate state by state instead. 66 */ 67 func (a *Automaton) sortedTransitions() [][]*Transition { 68 numStates := a.numStates() 69 transitions := make([][]*Transition, numStates) 70 for s := 0; s < numStates; s++ { 71 numTransitions := a.numTransitions(s) 72 transitions[s] = make([]*Transition, numTransitions) 73 for t := 0; t < numTransitions; t++ { 74 transition := newTransition() 75 a.transition(s, t, transition) 76 transitions[s][t] = transition 77 } 78 } 79 return transitions 80 } 81 82 /* Returns true if this state is an accept state. */ 83 func (a *Automaton) IsAccept(state int) bool { 84 return a.isAccept.Get(int64(state)) 85 } 86 87 /* Add a new transition with min = max = label. */ 88 func (a *Automaton) addTransition(source, dest, label int) { 89 a.addTransitionRange(source, dest, label, label) 90 } 91 92 /* Add a new transition with the specified source, dest, min, max. */ 93 func (a *Automaton) addTransitionRange(source, dest, min, max int) { 94 assert(len(a.transitions)%3 == 0) 95 assert2(source < a.numStates(), "source=%v is out of bounds (maxState is %v)", source, a.numStates()-1) 96 assert2(dest < a.numStates(), "dest=%v is out of bounds (maxState is %v)", dest, a.numStates()-1) 97 98 if a.curState != source { 99 if a.curState != -1 { 100 a.finishCurrentState() 101 } 102 103 // move to next source: 104 a.curState = source 105 assert2(a.states[2*a.curState] == -1, "from state (%v) already had transitions added", source) 106 assert(a.states[2*a.curState+1] == 0) 107 a.states[2*a.curState] = len(a.transitions) 108 } 109 110 a.transitions = append(a.transitions, dest, min, max) 111 112 // increment transition count for this state 113 a.states[2*a.curState+1]++ 114 } 115 116 /* 117 Add a [virtual] epsilon transition between source and dest. Dest 118 state must already have all transitions added because this method 119 simply copies those same transitions over to source. 120 */ 121 func (a *Automaton) addEpsilon(source, dest int) { 122 t := newTransition() 123 count := a.initTransition(dest, t) 124 for i := 0; i < count; i++ { 125 a.nextTransition(t) 126 a.addTransitionRange(source, t.dest, t.min, t.max) 127 } 128 if a.IsAccept(dest) { 129 a.setAccept(source, true) 130 } 131 } 132 133 /* 134 Copies over all state/transition from other. The state numbers are 135 sequentially assigned (appended). 136 */ 137 func (a *Automaton) copy(other *Automaton) { 138 // bulk copy and then fixup the state pointers 139 stateOffset := a.numStates() 140 a.states = append(a.states, other.states...) 141 for i := 0; i < len(other.states); i += 2 { 142 if a.states[stateOffset*2+i] != -1 { 143 a.states[stateOffset*2+i] += len(a.transitions) 144 } 145 } 146 otherAcceptState := other.isAccept 147 for state := otherAcceptState.NextSetBit(0); state != -1; state = otherAcceptState.NextSetBit(state + 1) { 148 a.setAccept(stateOffset+int(state), true) 149 } 150 151 // bulk copy and then fixup dest for each transition 152 transOffset := len(a.transitions) 153 a.transitions = append(a.transitions, other.transitions...) 154 for i := 0; i < len(other.transitions); i += 3 { 155 a.transitions[transOffset+i] += stateOffset 156 } 157 158 if !other.deterministic { 159 a.deterministic = false 160 } 161 } 162 163 /* Freezes the last state, sorting and reducing the transitions. */ 164 func (a *Automaton) finishCurrentState() { 165 numTransitions := a.states[2*a.curState+1] 166 assert(numTransitions > 0) 167 168 offset := a.states[2*a.curState] 169 start := offset / 3 170 util.NewInPlaceMergeSorter(destMinMaxSorter(a.transitions)).Sort(start, start+numTransitions) 171 172 // reduce any "adjacent" transitions: 173 upto, min, max, dest := 0, -1, -1, -1 174 175 for i := 0; i < numTransitions; i++ { 176 tDest := a.transitions[offset+3*i] 177 tMin := a.transitions[offset+3*i+1] 178 tMax := a.transitions[offset+3*i+2] 179 180 if dest == tDest { 181 if tMin <= max+1 { 182 if tMax > max { 183 max = tMax 184 } 185 } else { 186 if dest != -1 { 187 a.transitions[offset+3*upto] = dest 188 a.transitions[offset+3*upto+1] = min 189 a.transitions[offset+3*upto+2] = max 190 upto++ 191 } 192 min, max = tMin, tMax 193 } 194 } else { 195 if dest != -1 { 196 a.transitions[offset+3*upto] = dest 197 a.transitions[offset+3*upto+1] = min 198 a.transitions[offset+3*upto+2] = max 199 upto++ 200 } 201 dest, min, max = tDest, tMin, tMax 202 } 203 } 204 205 if dest != -1 { 206 // last transition 207 a.transitions[offset+3*upto] = dest 208 a.transitions[offset+3*upto+1] = min 209 a.transitions[offset+3*upto+2] = max 210 upto++ 211 } 212 213 a.transitions = a.transitions[:len(a.transitions)-(numTransitions-upto)*3] 214 a.states[2*a.curState+1] = upto 215 216 // sort transitions by min/max/dest: 217 util.NewInPlaceMergeSorter(minMaxDestSorter(a.transitions)).Sort(start, start+upto) 218 219 if a.deterministic && upto > 1 { 220 lastMax := a.transitions[offset+2] 221 for i := 1; i < upto; i++ { 222 min = a.transitions[offset+3*i+1] 223 if min <= lastMax { 224 a.deterministic = false 225 break 226 } 227 lastMax = a.transitions[offset+3*i+2] 228 } 229 } 230 } 231 232 /* 233 Finishes the current state; call this once you are done adding 234 transitions for a state. This is automatically called if you start 235 adding transitions to a new source state, but for the last state you 236 add, you need to call this method yourself. 237 */ 238 func (a *Automaton) finishState() { 239 if a.curState != -1 { 240 a.finishCurrentState() 241 a.curState = -1 242 } 243 } 244 245 /* How many states this automaton has. */ 246 func (a *Automaton) numStates() int { 247 return len(a.states) / 2 248 } 249 250 /* How many transitions this state has. */ 251 func (a *Automaton) numTransitions(state int) int { 252 if count := a.states[2*state+1]; count != -1 { 253 return count 254 } 255 return 0 256 } 257 258 type destMinMaxSorter []int 259 260 func (s destMinMaxSorter) Len() int { 261 panic("niy") 262 } 263 264 func (s destMinMaxSorter) Swap(i, j int) { 265 iStart, jStart := 3*i, 3*j 266 for n := 0; n < 3; n++ { 267 s[iStart+n], s[jStart+n] = s[jStart+n], s[iStart+n] 268 } 269 } 270 271 func (s destMinMaxSorter) Less(i, j int) bool { 272 iStart := 3 * i 273 jStart := 3 * j 274 275 // first dest: 276 iDest := s[iStart] 277 jDest := s[jStart] 278 if iDest < jDest { 279 return true 280 } else if iDest > jDest { 281 return false 282 } 283 284 // then min: 285 iMin := s[iStart+1] 286 jMin := s[jStart+1] 287 if iMin < jMin { 288 return true 289 } else if iMin > jMin { 290 return false 291 } 292 293 // then max: 294 iMax := s[iStart+2] 295 jMax := s[jStart+2] 296 return iMax < jMax 297 } 298 299 type minMaxDestSorter []int 300 301 func (s minMaxDestSorter) Len() int { 302 panic("niy") 303 } 304 305 func (s minMaxDestSorter) Swap(i, j int) { 306 iStart, jStart := 3*i, 3*j 307 for n := 0; n < 3; n++ { 308 s[iStart+n], s[jStart+n] = s[jStart+n], s[iStart+n] 309 } 310 } 311 312 func (s minMaxDestSorter) Less(i, j int) bool { 313 iStart, jStart := 3*i, 3*j 314 315 iMin, jMin := s[iStart+1], s[jStart+1] 316 if iMin < jMin { 317 return true 318 } else if iMin > jMin { 319 return false 320 } 321 322 iMax, jMax := s[iStart+2], s[jStart+2] 323 if iMax < jMax { 324 return true 325 } else if iMax > jMax { 326 return false 327 } 328 329 iDest, jDest := s[iStart], s[jStart] 330 return iDest < jDest 331 } 332 333 /* 334 Initialize the provided Transition to iterate through all transitions 335 leaving the specified state. You must call nextTransition() to get 336 each transition. Returns the number of transitions leaving this tate. 337 */ 338 func (a *Automaton) initTransition(state int, t *Transition) int { 339 assert2(state < a.numStates(), "state=%v nextState=%v", state, a.numStates()) 340 t.source = state 341 t.transitionUpto = a.states[2*state] 342 return a.numTransitions(state) 343 } 344 345 /* Iterate to the next transition after the provided one */ 346 func (a *Automaton) nextTransition(t *Transition) { 347 // make sure there is still a transition left 348 assert((t.transitionUpto + 3 - a.states[2*t.source]) <= 3*a.states[2*t.source+1]) 349 t.dest = a.transitions[t.transitionUpto] 350 t.min = a.transitions[t.transitionUpto+1] 351 t.max = a.transitions[t.transitionUpto+2] 352 t.transitionUpto += 3 353 } 354 355 /* 356 Fill the provided Transition with the index'th transition leaving the 357 specified state. 358 */ 359 func (a *Automaton) transition(state, index int, t *Transition) { 360 i := a.states[2*state] + 3*index 361 t.source = state 362 t.dest = a.transitions[i] 363 t.min = a.transitions[i+1] 364 t.max = a.transitions[i+2] 365 } 366 367 // L563 368 /* Returns sorted array of all interval start points. */ 369 func (a *Automaton) startPoints() []int { 370 pointset := make(map[int]bool) 371 pointset[MIN_CODE_POINT] = true 372 // fmt.Println("getStartPoints") 373 for s := 0; s < len(a.states); s += 2 { 374 trans := a.states[s] 375 limit := trans + 3*a.states[s+1] 376 // fmt.Printf(" state=%v trans=%v limit=%v\n", s/2, trans, limit) 377 for trans < limit { 378 min, max := a.transitions[trans+1], a.transitions[trans+2] 379 // fmt.Printf(" min=%v\n", min) 380 pointset[min] = true 381 if max < unicode.MaxRune { 382 pointset[max+1] = true 383 } 384 trans += 3 385 } 386 } 387 var points []int 388 for m, _ := range pointset { 389 points = append(points, m) 390 } 391 sort.Ints(points) 392 return points 393 } 394 395 /* Performs lookup in transitions, assuming determinism. */ 396 func (a *Automaton) step(state, label int) int { 397 assert(state >= 0) 398 assert(label >= 0) 399 if 2*state >= len(a.states) { 400 return -1 // invalid state 401 } 402 trans := a.states[2*state] 403 limit := trans + 3*a.states[2*state+1] 404 // TODO binary search 405 for trans < limit { 406 dest, min, max := a.transitions[trans], a.transitions[trans+1], a.transitions[trans+2] 407 if min <= label && label <= max { 408 return dest 409 } 410 trans += 3 411 } 412 return -1 413 } 414 415 // Go doesn't have unicode.MinRune which should be 0 416 const MIN_CODE_POINT = 0 417 418 type AutomatonBuilder struct { 419 transitions []int 420 a *Automaton 421 } 422 423 func newAutomatonBuilder() *AutomatonBuilder { 424 return &AutomatonBuilder{ 425 a: newEmptyAutomaton(), 426 } 427 } 428 429 func (b *AutomatonBuilder) addTransitionRange(source, dest, min, max int) { 430 b.transitions = append(b.transitions, source, dest, min, max) 431 } 432 433 type srcMinMaxDestSorter []int 434 435 func (s srcMinMaxDestSorter) Len() int { 436 panic("niy") 437 } 438 439 func (s srcMinMaxDestSorter) Swap(i, j int) { 440 iStart, jStart := 4*i, 4*j 441 for n := 0; n < 4; n++ { 442 s[iStart+n], s[jStart+n] = s[jStart+n], s[iStart+n] 443 } 444 } 445 446 func (s srcMinMaxDestSorter) Less(i, j int) bool { 447 iStart, jStart := 4*i, 4*j 448 449 iSrc, jSrc := s[iStart], s[jStart] 450 if iSrc < jSrc { 451 return true 452 } else if iSrc > jSrc { 453 return false 454 } 455 456 iMin, jMin := s[iStart+2], s[jStart+2] 457 if iMin < jMin { 458 return true 459 } else if iMin > jMin { 460 return false 461 } 462 463 iMax, jMax := s[iStart+3], s[jStart+3] 464 if iMax < jMax { 465 return true 466 } else if iMax > jMax { 467 return false 468 } 469 470 iDest, jDest := s[iStart+1], s[jStart+1] 471 return iDest < jDest 472 } 473 474 /* Compiles all added states and transitions into a new Automaton and returns it. */ 475 func (b *AutomatonBuilder) finish() *Automaton { 476 // fmt.Printf("LA.Builder.finish: count=%v\n", len(b.transitions)/4) 477 // fmt.Println("finish pending") 478 util.NewInPlaceMergeSorter(srcMinMaxDestSorter(b.transitions)).Sort(0, len(b.transitions)/4) 479 for upto := 0; upto < len(b.transitions); upto += 4 { 480 b.a.addTransitionRange( 481 b.transitions[upto], 482 b.transitions[upto+1], 483 b.transitions[upto+2], 484 b.transitions[upto+3], 485 ) 486 } 487 488 b.a.finishState() 489 return b.a 490 } 491 492 func (b *AutomatonBuilder) createState() int { 493 return b.a.createState() 494 } 495 496 func (b *AutomatonBuilder) setAccept(state int, accept bool) { 497 b.a.setAccept(state, accept) 498 } 499 500 func (b *AutomatonBuilder) isAccept(state int) bool { 501 return b.a.IsAccept(state) 502 } 503 504 func (b *AutomatonBuilder) copy(other *Automaton) { 505 offset := b.a.numStates() 506 otherNumStates := other.numStates() 507 for s := 0; s < otherNumStates; s++ { 508 newState := b.createState() 509 b.setAccept(newState, other.IsAccept(s)) 510 } 511 t := newTransition() 512 for s := 0; s < otherNumStates; s++ { 513 count := other.initTransition(s, t) 514 for i := 0; i < count; i++ { 515 other.nextTransition(t) 516 b.addTransitionRange(offset+s, offset+t.dest, t.min, t.max) 517 } 518 } 519 }