github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/automaton_test.go (about) 1 package automaton 2 3 import ( 4 "container/list" 5 "github.com/balzaczyy/golucene/core/util" 6 . "github.com/balzaczyy/golucene/test_framework/util" 7 // "fmt" 8 "math/rand" 9 "testing" 10 "unicode" 11 ) 12 13 func TestRegExpToAutomaton(t *testing.T) { 14 a := NewRegExp("[^ \t\r\n]+").ToAutomaton() 15 assert(a.deterministic) 16 assert(-1 == a.curState) 17 assert(2 == a.numStates()) 18 } 19 20 func TestMinusSimple(t *testing.T) { 21 assert(sameLanguage(makeChar('b'), minus(makeCharRange('a', 'b'), makeChar('a')))) 22 assert(sameLanguage(MakeEmpty(), minus(makeChar('a'), makeChar('a')))) 23 } 24 25 func TestComplementSimple(t *testing.T) { 26 a := makeChar('a') 27 assert(sameLanguage(a, complement(complement(a)))) 28 } 29 30 func TestDeterminizeSimple(t *testing.T) { 31 a1 := complement(NewRegExpWithFlag("-", NONE).ToAutomaton()) 32 a2 := NewRegExpWithFlag("ݖ|+", NONE).ToAutomaton() 33 a := concatenate(a1, a2) 34 a = removeDeadStates(a) 35 a = determinize(a) 36 assert(a.numStates() == 4) 37 } 38 39 // func TestStringUnion(t testing.T) { 40 // strings := make([]string, 0, 500) 41 // for i := NextInt(Random(), 0, 1000); i >= 0; i-- { 42 // strings = append(strings, RandomUnicodeString(Random())) 43 // } 44 45 // sort.Strings(strings) 46 // union := makeStringUnion(strings) 47 // assert(union.isDeterministic()) 48 // assert(sameLanguage(union, naiveUnion(strings))) 49 // } 50 51 // util/automaton/AutomatonTestUtil.java 52 /* 53 Utilities for testing automata. 54 55 Capable of generating random regular expressions, and automata, and 56 also provides a number of very basic unoptimized implementations 57 (*slow) for testing. 58 */ 59 60 // Returns random string, including full unicode range. 61 func randomRegexp(r *rand.Rand) string { 62 for i := 0; i < 500; i++ { 63 regexp := randomRegexpString(r) 64 // we will also generate some undefined unicode queries 65 if !util.IsValidUTF16String([]rune(regexp)) { 66 continue 67 } 68 if ok := func(regexp string) (ok bool) { 69 ok = true 70 defer func() { 71 if r := recover(); r != nil { 72 // log.Println("Recovered:", r) 73 ok = false 74 } 75 }() 76 // log.Println("Trying", regexp) 77 NewRegExpWithFlag(regexp, NONE) 78 return 79 }(regexp); ok { 80 // fmt.Println("Valid regexp found:", regexp) 81 return regexp 82 } 83 } 84 panic("should not be here") 85 } 86 87 func randomRegexpString(r *rand.Rand) string { 88 end := r.Intn(20) 89 if end == 0 { 90 // allow 0 length 91 return "" 92 } 93 buffer := make([]rune, 0, end) 94 for i := 0; i < end; i++ { 95 t := r.Intn(15) 96 if 0 == t && i < end-1 { 97 // Make a surrogate pair 98 // High surrogate 99 buffer = append(buffer, rune(NextInt(r, 0xd800, 0xdbff))) 100 i++ 101 // Low surrogate 102 buffer = append(buffer, rune(NextInt(r, 0xdc00, 0xdfff))) 103 } else if t <= 1 { 104 buffer = append(buffer, rune(r.Intn(0x80))) 105 } else { 106 switch t { 107 case 2: 108 buffer = append(buffer, rune(NextInt(r, 0x80, 0x800))) 109 case 3: 110 buffer = append(buffer, rune(NextInt(r, 0x800, 0xd7ff))) 111 case 4: 112 buffer = append(buffer, rune(NextInt(r, 0xe000, 0xffff))) 113 case 5: 114 buffer = append(buffer, '.') 115 case 6: 116 buffer = append(buffer, '?') 117 case 7: 118 buffer = append(buffer, '*') 119 case 8: 120 buffer = append(buffer, '+') 121 case 9: 122 buffer = append(buffer, '(') 123 case 10: 124 buffer = append(buffer, ')') 125 case 11: 126 buffer = append(buffer, '-') 127 case 12: 128 buffer = append(buffer, '[') 129 case 13: 130 buffer = append(buffer, ']') 131 case 14: 132 buffer = append(buffer, '|') 133 } 134 } 135 } 136 return string(buffer) 137 } 138 139 // L267 140 // Return a random NFA/DFA for testing 141 func randomAutomaton(r *rand.Rand) *Automaton { 142 // get two random Automata from regexps 143 a1 := NewRegExpWithFlag(randomRegexp(r), NONE).ToAutomaton() 144 if r.Intn(2) == 0 { 145 a1 = complement(a1) 146 } 147 148 a2 := NewRegExpWithFlag(randomRegexp(r), NONE).ToAutomaton() 149 if r.Intn(2) == 0 { 150 a2 = complement(a2) 151 } 152 153 // combine them in random ways 154 switch r.Intn(4) { 155 case 0: 156 // fmt.Println("DEBUG way 0") 157 return concatenate(a1, a2) 158 case 1: 159 // fmt.Println("DEBUG way 1") 160 return union(a1, a2) 161 case 2: 162 // fmt.Println("DEBUG way 2") 163 return intersection(a1, a2) 164 default: 165 // fmt.Println("DEBUG way 3") 166 return minus(a1, a2) 167 } 168 } 169 170 /** 171 * below are original, unoptimized implementations of DFA operations for testing. 172 * These are from brics automaton, full license (BSD) below: 173 */ 174 175 /* 176 * dk.brics.automaton 177 * 178 * Copyright (c) 2001-2009 Anders Moeller 179 * All rights reserved. 180 * 181 * Redistribution and use in source and binary forms, with or without 182 * modification, are permitted provided that the following conditions 183 * are met: 184 * 1. Redistributions of source code must retain the above copyright 185 * notice, this list of conditions and the following disclaimer. 186 * 2. Redistributions in binary form must reproduce the above copyright 187 * notice, this list of conditions and the following disclaimer in the 188 * documentation and/or other materials provided with the distribution. 189 * 3. The name of the author may not be used to endorse or promote products 190 * derived from this software without specific prior written permission. 191 * 192 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 193 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 194 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 195 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 196 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 197 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 198 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 199 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 200 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 201 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 202 */ 203 204 /** 205 * Simple, original brics implementation of Brzozowski minimize() 206 */ 207 func minimizeSimple(a *Automaton) *Automaton { 208 var initialSet map[int]bool 209 a, initialSet = reverse(a) 210 a = determinizeSimple(a, initialSet) 211 a, initialSet = reverse(a) 212 a = determinizeSimple(a, initialSet) 213 return a 214 } 215 216 /* 217 Simple original brics implementation of determinize() 218 Determinizes the given automaton using the given set of initial states. 219 */ 220 func determinizeSimple(a *Automaton, initialset map[int]bool) *Automaton { 221 if a.numStates() == 0 { 222 return a 223 } 224 points := a.startPoints() 225 // subset construction 226 sets := make(map[string]bool) 227 hash := func(sets map[int]bool) string { 228 n := util.NewOpenBitSet() 229 for k, _ := range sets { 230 n.Set(int64(k)) 231 } 232 return n.String() 233 } 234 worklist := list.New() 235 newstate := make(map[string]int) 236 sets[hash(initialset)] = true 237 worklist.PushBack(initialset) 238 b := newAutomatonBuilder() 239 b.createState() 240 newstate[hash(initialset)] = 0 241 t := newTransition() 242 for worklist.Len() > 0 { 243 s := worklist.Remove(worklist.Front()).(map[int]bool) 244 r := newstate[hash(s)] 245 for q, _ := range s { 246 if a.IsAccept(q) { 247 b.setAccept(r, true) 248 break 249 } 250 } 251 for n, point := range points { 252 p := make(map[int]bool) 253 for q, _ := range s { 254 count := a.initTransition(q, t) 255 for i := 0; i < count; i++ { 256 a.nextTransition(t) 257 if t.min <= point && point <= t.max { 258 p[t.dest] = true 259 } 260 } 261 } 262 263 hashKey := hash(p) 264 if _, ok := sets[hashKey]; !ok { 265 sets[hashKey] = true 266 worklist.PushBack(p) 267 newstate[hashKey] = b.createState() 268 } 269 q := newstate[hashKey] 270 min := point 271 var max int 272 if n+1 < len(points) { 273 max = points[n+1] - 1 274 } else { 275 max = unicode.MaxRune 276 } 277 b.addTransitionRange(r, q, min, max) 278 } 279 } 280 281 return removeDeadStates(b.finish()) 282 }