github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/daciukMihov.go (about) 1 package automaton 2 3 // util/automaton/DaciukMihovAutomatonBuilder.java 4 5 type DaciukMihovAutomatonBuilder struct { 6 } 7 8 // DFSA state with rune labels on transition 9 type dfsaState struct { 10 } 11 12 /* 13 Add another rune sequence to this automaton. The sequence must be 14 lexicographically larger or equal compared to any previous sequences 15 added to this automaton (the input must be sorted) 16 */ 17 func (builder *DaciukMihovAutomatonBuilder) add(current []rune) { 18 panic("niy") 19 // assert2(builder.stateRegistry != nil, "Automaton already builder.") 20 // assert2(builder.previous == nil || builder.previous <= cur) 21 } 22 23 /* 24 Finalize the automaton and return the root state. No more strings can 25 be added to the builder after this call. 26 */ 27 func (builder *DaciukMihovAutomatonBuilder) complete() *dfsaState { 28 panic("not implemented yet") 29 } 30 31 // Internal recursive traversal for conversion. 32 func convert(a *AutomatonBuilder, s *dfsaState, visited map[*dfsaState]int) int { 33 panic("not implemented yet") 34 } 35 36 /* 37 Build a minimal, deterministic automaton from a sorted list of []byte 38 representing strings in UTF-8. These strings must be binary-sorted. 39 */ 40 func buildDaciukMihovAutomaton(input [][]byte) *Automaton { 41 // builder := &DaciukMihovAutomatonBuilder{} 42 // scratch := make([]rune, 0) 43 // for _, b := range input { 44 panic("not implemented yet") 45 // builder.add(scratch) 46 // } 47 48 // a := newEmptyAutomaton() 49 // a.initial = convert( 50 // builder.complete(), 51 // make(map[*dfsaState]*State)) 52 // a.deterministic = true 53 // return a 54 } 55 56 // utils/CharsRef.java 57 58 func compareUTF16SortedAsUTF8(a, b []rune) int { 59 // if a == b { 60 // return 0 61 // } 62 63 for i, lenA, lenB := 0, len(a), len(b); i < lenA && i < lenB; i++ { 64 aChar, bChar := a[i], b[i] 65 if aChar != bChar { 66 // http://icu-project.org/docs/papers/utf16_code_point_order.html 67 68 // aChar != bChar, fix up each one if they're both in or above 69 // the surrogate range, then compare them 70 if aChar >= 0xd800 && bChar >= 0xd800 { 71 if aChar >= 0xe000 { 72 aChar -= 0x800 73 } else { 74 aChar += 0x2000 75 } 76 77 if bChar >= 0xe000 { 78 bChar -= 0x800 79 } else { 80 bChar += 0x2000 81 } 82 } 83 84 // now aChar and bChar are in code point order 85 return int(aChar) - int(bChar) 86 } 87 } 88 89 // One is a prefix of the other, or, they are equal: 90 return len(a) - len(b) 91 }