github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/automaton_test.go (about)

     1  package automaton
     2  
     3  import (
     4  	"container/list"
     5  	"github.com/balzaczyy/golucene/core/util"
     6  	. "github.com/balzaczyy/golucene/test_framework/util"
     7  	// "fmt"
     8  	"math/rand"
     9  	"testing"
    10  	"unicode"
    11  )
    12  
    13  func TestRegExpToAutomaton(t *testing.T) {
    14  	a := NewRegExp("[^ \t\r\n]+").ToAutomaton()
    15  	assert(a.deterministic)
    16  	assert(-1 == a.curState)
    17  	assert(2 == a.numStates())
    18  }
    19  
    20  func TestMinusSimple(t *testing.T) {
    21  	assert(sameLanguage(makeChar('b'), minus(makeCharRange('a', 'b'), makeChar('a'))))
    22  	assert(sameLanguage(MakeEmpty(), minus(makeChar('a'), makeChar('a'))))
    23  }
    24  
    25  func TestComplementSimple(t *testing.T) {
    26  	a := makeChar('a')
    27  	assert(sameLanguage(a, complement(complement(a))))
    28  }
    29  
    30  func TestDeterminizeSimple(t *testing.T) {
    31  	a1 := complement(NewRegExpWithFlag("-", NONE).ToAutomaton())
    32  	a2 := NewRegExpWithFlag("ݖ|+", NONE).ToAutomaton()
    33  	a := concatenate(a1, a2)
    34  	a = removeDeadStates(a)
    35  	a = determinize(a)
    36  	assert(a.numStates() == 4)
    37  }
    38  
    39  // func TestStringUnion(t testing.T) {
    40  // strings := make([]string, 0, 500)
    41  // for i := NextInt(Random(), 0, 1000); i >= 0; i-- {
    42  // 	strings = append(strings, RandomUnicodeString(Random()))
    43  // }
    44  
    45  // sort.Strings(strings)
    46  // union := makeStringUnion(strings)
    47  // assert(union.isDeterministic())
    48  // assert(sameLanguage(union, naiveUnion(strings)))
    49  // }
    50  
    51  // util/automaton/AutomatonTestUtil.java
    52  /*
    53  Utilities for testing automata.
    54  
    55  Capable of generating random regular expressions, and automata, and
    56  also provides a number of very basic unoptimized implementations
    57  (*slow) for testing.
    58  */
    59  
    60  // Returns random string, including full unicode range.
    61  func randomRegexp(r *rand.Rand) string {
    62  	for i := 0; i < 500; i++ {
    63  		regexp := randomRegexpString(r)
    64  		// we will also generate some undefined unicode queries
    65  		if !util.IsValidUTF16String([]rune(regexp)) {
    66  			continue
    67  		}
    68  		if ok := func(regexp string) (ok bool) {
    69  			ok = true
    70  			defer func() {
    71  				if r := recover(); r != nil {
    72  					// log.Println("Recovered:", r)
    73  					ok = false
    74  				}
    75  			}()
    76  			// log.Println("Trying", regexp)
    77  			NewRegExpWithFlag(regexp, NONE)
    78  			return
    79  		}(regexp); ok {
    80  			// fmt.Println("Valid regexp found:", regexp)
    81  			return regexp
    82  		}
    83  	}
    84  	panic("should not be here")
    85  }
    86  
    87  func randomRegexpString(r *rand.Rand) string {
    88  	end := r.Intn(20)
    89  	if end == 0 {
    90  		// allow 0 length
    91  		return ""
    92  	}
    93  	buffer := make([]rune, 0, end)
    94  	for i := 0; i < end; i++ {
    95  		t := r.Intn(15)
    96  		if 0 == t && i < end-1 {
    97  			// Make a surrogate pair
    98  			// High surrogate
    99  			buffer = append(buffer, rune(NextInt(r, 0xd800, 0xdbff)))
   100  			i++
   101  			// Low surrogate
   102  			buffer = append(buffer, rune(NextInt(r, 0xdc00, 0xdfff)))
   103  		} else if t <= 1 {
   104  			buffer = append(buffer, rune(r.Intn(0x80)))
   105  		} else {
   106  			switch t {
   107  			case 2:
   108  				buffer = append(buffer, rune(NextInt(r, 0x80, 0x800)))
   109  			case 3:
   110  				buffer = append(buffer, rune(NextInt(r, 0x800, 0xd7ff)))
   111  			case 4:
   112  				buffer = append(buffer, rune(NextInt(r, 0xe000, 0xffff)))
   113  			case 5:
   114  				buffer = append(buffer, '.')
   115  			case 6:
   116  				buffer = append(buffer, '?')
   117  			case 7:
   118  				buffer = append(buffer, '*')
   119  			case 8:
   120  				buffer = append(buffer, '+')
   121  			case 9:
   122  				buffer = append(buffer, '(')
   123  			case 10:
   124  				buffer = append(buffer, ')')
   125  			case 11:
   126  				buffer = append(buffer, '-')
   127  			case 12:
   128  				buffer = append(buffer, '[')
   129  			case 13:
   130  				buffer = append(buffer, ']')
   131  			case 14:
   132  				buffer = append(buffer, '|')
   133  			}
   134  		}
   135  	}
   136  	return string(buffer)
   137  }
   138  
   139  // L267
   140  // Return a random NFA/DFA for testing
   141  func randomAutomaton(r *rand.Rand) *Automaton {
   142  	// get two random Automata from regexps
   143  	a1 := NewRegExpWithFlag(randomRegexp(r), NONE).ToAutomaton()
   144  	if r.Intn(2) == 0 {
   145  		a1 = complement(a1)
   146  	}
   147  
   148  	a2 := NewRegExpWithFlag(randomRegexp(r), NONE).ToAutomaton()
   149  	if r.Intn(2) == 0 {
   150  		a2 = complement(a2)
   151  	}
   152  
   153  	// combine them in random ways
   154  	switch r.Intn(4) {
   155  	case 0:
   156  		// fmt.Println("DEBUG way 0")
   157  		return concatenate(a1, a2)
   158  	case 1:
   159  		// fmt.Println("DEBUG way 1")
   160  		return union(a1, a2)
   161  	case 2:
   162  		// fmt.Println("DEBUG way 2")
   163  		return intersection(a1, a2)
   164  	default:
   165  		// fmt.Println("DEBUG way 3")
   166  		return minus(a1, a2)
   167  	}
   168  }
   169  
   170  /**
   171   * below are original, unoptimized implementations of DFA operations for testing.
   172   * These are from brics automaton, full license (BSD) below:
   173   */
   174  
   175  /*
   176   * dk.brics.automaton
   177   *
   178   * Copyright (c) 2001-2009 Anders Moeller
   179   * All rights reserved.
   180   *
   181   * Redistribution and use in source and binary forms, with or without
   182   * modification, are permitted provided that the following conditions
   183   * are met:
   184   * 1. Redistributions of source code must retain the above copyright
   185   *    notice, this list of conditions and the following disclaimer.
   186   * 2. Redistributions in binary form must reproduce the above copyright
   187   *    notice, this list of conditions and the following disclaimer in the
   188   *    documentation and/or other materials provided with the distribution.
   189   * 3. The name of the author may not be used to endorse or promote products
   190   *    derived from this software without specific prior written permission.
   191   *
   192   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   193   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   194   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   195   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   196   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   197   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   198   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   199   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   200   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   201   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   202   */
   203  
   204  /**
   205   * Simple, original brics implementation of Brzozowski minimize()
   206   */
   207  func minimizeSimple(a *Automaton) *Automaton {
   208  	var initialSet map[int]bool
   209  	a, initialSet = reverse(a)
   210  	a = determinizeSimple(a, initialSet)
   211  	a, initialSet = reverse(a)
   212  	a = determinizeSimple(a, initialSet)
   213  	return a
   214  }
   215  
   216  /*
   217  Simple original brics implementation of determinize()
   218  Determinizes the given automaton using the given set of initial states.
   219  */
   220  func determinizeSimple(a *Automaton, initialset map[int]bool) *Automaton {
   221  	if a.numStates() == 0 {
   222  		return a
   223  	}
   224  	points := a.startPoints()
   225  	// subset construction
   226  	sets := make(map[string]bool)
   227  	hash := func(sets map[int]bool) string {
   228  		n := util.NewOpenBitSet()
   229  		for k, _ := range sets {
   230  			n.Set(int64(k))
   231  		}
   232  		return n.String()
   233  	}
   234  	worklist := list.New()
   235  	newstate := make(map[string]int)
   236  	sets[hash(initialset)] = true
   237  	worklist.PushBack(initialset)
   238  	b := newAutomatonBuilder()
   239  	b.createState()
   240  	newstate[hash(initialset)] = 0
   241  	t := newTransition()
   242  	for worklist.Len() > 0 {
   243  		s := worklist.Remove(worklist.Front()).(map[int]bool)
   244  		r := newstate[hash(s)]
   245  		for q, _ := range s {
   246  			if a.IsAccept(q) {
   247  				b.setAccept(r, true)
   248  				break
   249  			}
   250  		}
   251  		for n, point := range points {
   252  			p := make(map[int]bool)
   253  			for q, _ := range s {
   254  				count := a.initTransition(q, t)
   255  				for i := 0; i < count; i++ {
   256  					a.nextTransition(t)
   257  					if t.min <= point && point <= t.max {
   258  						p[t.dest] = true
   259  					}
   260  				}
   261  			}
   262  
   263  			hashKey := hash(p)
   264  			if _, ok := sets[hashKey]; !ok {
   265  				sets[hashKey] = true
   266  				worklist.PushBack(p)
   267  				newstate[hashKey] = b.createState()
   268  			}
   269  			q := newstate[hashKey]
   270  			min := point
   271  			var max int
   272  			if n+1 < len(points) {
   273  				max = points[n+1] - 1
   274  			} else {
   275  				max = unicode.MaxRune
   276  			}
   277  			b.addTransitionRange(r, q, min, max)
   278  		}
   279  	}
   280  
   281  	return removeDeadStates(b.finish())
   282  }