github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/daciukMihov.go (about)

     1  package automaton
     2  
     3  // util/automaton/DaciukMihovAutomatonBuilder.java
     4  
     5  type DaciukMihovAutomatonBuilder struct {
     6  }
     7  
     8  // DFSA state with rune labels on transition
     9  type dfsaState struct {
    10  }
    11  
    12  /*
    13  Add another rune sequence to this automaton. The sequence must be
    14  lexicographically larger or equal compared to any previous sequences
    15  added to this automaton (the input must be sorted)
    16  */
    17  func (builder *DaciukMihovAutomatonBuilder) add(current []rune) {
    18  	panic("niy")
    19  	// assert2(builder.stateRegistry != nil, "Automaton already builder.")
    20  	// assert2(builder.previous == nil || builder.previous <= cur)
    21  }
    22  
    23  /*
    24  Finalize the automaton and return the root state. No more strings can
    25  be added to the builder after this call.
    26  */
    27  func (builder *DaciukMihovAutomatonBuilder) complete() *dfsaState {
    28  	panic("not implemented yet")
    29  }
    30  
    31  // Internal recursive traversal for conversion.
    32  func convert(a *AutomatonBuilder, s *dfsaState, visited map[*dfsaState]int) int {
    33  	panic("not implemented yet")
    34  }
    35  
    36  /*
    37  Build a minimal, deterministic automaton from a sorted list of []byte
    38  representing strings in UTF-8. These strings must be binary-sorted.
    39  */
    40  func buildDaciukMihovAutomaton(input [][]byte) *Automaton {
    41  	// builder := &DaciukMihovAutomatonBuilder{}
    42  	// scratch := make([]rune, 0)
    43  	// for _, b := range input {
    44  	panic("not implemented yet")
    45  	// 	builder.add(scratch)
    46  	// }
    47  
    48  	// a := newEmptyAutomaton()
    49  	// a.initial = convert(
    50  	// 	builder.complete(),
    51  	// 	make(map[*dfsaState]*State))
    52  	// a.deterministic = true
    53  	// return a
    54  }
    55  
    56  // utils/CharsRef.java
    57  
    58  func compareUTF16SortedAsUTF8(a, b []rune) int {
    59  	// if a == b {
    60  	// 	return 0
    61  	// }
    62  
    63  	for i, lenA, lenB := 0, len(a), len(b); i < lenA && i < lenB; i++ {
    64  		aChar, bChar := a[i], b[i]
    65  		if aChar != bChar {
    66  			// http://icu-project.org/docs/papers/utf16_code_point_order.html
    67  
    68  			// aChar != bChar, fix up each one if they're both in or above
    69  			// the surrogate range, then compare them
    70  			if aChar >= 0xd800 && bChar >= 0xd800 {
    71  				if aChar >= 0xe000 {
    72  					aChar -= 0x800
    73  				} else {
    74  					aChar += 0x2000
    75  				}
    76  
    77  				if bChar >= 0xe000 {
    78  					bChar -= 0x800
    79  				} else {
    80  					bChar += 0x2000
    81  				}
    82  			}
    83  
    84  			// now aChar and bChar are in code point order
    85  			return int(aChar) - int(bChar)
    86  		}
    87  	}
    88  
    89  	// One is a prefix of the other, or, they are equal:
    90  	return len(a) - len(b)
    91  }