golang.org/x/text@v0.14.0/search/pattern.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package search 6 7 import ( 8 "golang.org/x/text/internal/colltab" 9 ) 10 11 // TODO: handle variable primary weights? 12 13 func (p *Pattern) deleteEmptyElements() { 14 k := 0 15 for _, e := range p.ce { 16 if !isIgnorable(p.m, e) { 17 p.ce[k] = e 18 k++ 19 } 20 } 21 p.ce = p.ce[:k] 22 } 23 24 func isIgnorable(m *Matcher, e colltab.Elem) bool { 25 if e.Primary() > 0 { 26 return false 27 } 28 if e.Secondary() > 0 { 29 if !m.ignoreDiacritics { 30 return false 31 } 32 // Primary value is 0 and ignoreDiacritics is true. In this case we 33 // ignore the tertiary element, as it only pertains to the modifier. 34 return true 35 } 36 // TODO: further distinguish once we have the new implementation. 37 if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 { 38 return false 39 } 40 // TODO: we ignore the Quaternary level for now. 41 return true 42 } 43 44 // TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching. 45 46 func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) { 47 for start := 0; it.Next(); it.Reset(start) { 48 nextStart := it.End() 49 if end := p.searchOnce(it); end != -1 { 50 return start, end 51 } 52 start = nextStart 53 } 54 return -1, -1 55 } 56 57 func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) { 58 if it.Next() { 59 if end := p.searchOnce(it); end != -1 { 60 return 0, end 61 } 62 } 63 return -1, -1 64 } 65 66 // next advances to the next weight in a pattern. f must return one of the 67 // weights of a collation element. next will advance to the first non-zero 68 // weight and return this weight and true if it exists, or 0, false otherwise. 69 func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) { 70 for *i < len(p.ce) { 71 v := f(p.ce[*i]) 72 *i++ 73 if v != 0 { 74 // Skip successive ignorable values. 75 for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ { 76 } 77 return v, true 78 } 79 } 80 return 0, false 81 } 82 83 // TODO: remove this function once Elem is internal and Tertiary returns int. 84 func tertiary(e colltab.Elem) int { 85 return int(e.Tertiary()) 86 } 87 88 // searchOnce tries to match the pattern s.p at the text position i. s.buf needs 89 // to be filled with collation elements of the first segment, where n is the 90 // number of source bytes consumed for this segment. It will return the end 91 // position of the match or -1. 92 func (p *Pattern) searchOnce(it *colltab.Iter) (end int) { 93 var pLevel [4]int 94 95 m := p.m 96 for { 97 k := 0 98 for ; k < it.N; k++ { 99 if v := it.Elems[k].Primary(); v > 0 { 100 if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w { 101 return -1 102 } 103 } 104 105 if !m.ignoreDiacritics { 106 if v := it.Elems[k].Secondary(); v > 0 { 107 if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w { 108 return -1 109 } 110 } 111 } else if it.Elems[k].Primary() == 0 { 112 // We ignore tertiary values of collation elements of the 113 // secondary level. 114 continue 115 } 116 117 // TODO: distinguish between case and width. This will be easier to 118 // implement after we moved to the new collation implementation. 119 if !m.ignoreWidth && !m.ignoreCase { 120 if v := it.Elems[k].Tertiary(); v > 0 { 121 if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w { 122 return -1 123 } 124 } 125 } 126 // TODO: check quaternary weight 127 } 128 it.Discard() // Remove the current segment from the buffer. 129 130 // Check for completion. 131 switch { 132 // If any of these cases match, we are not at the end. 133 case pLevel[0] < len(p.ce): 134 case !m.ignoreDiacritics && pLevel[1] < len(p.ce): 135 case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce): 136 default: 137 // At this point, both the segment and pattern has matched fully. 138 // However, the segment may still be have trailing modifiers. 139 // This can be verified by another call to next. 140 end = it.End() 141 if it.Next() && it.Elems[0].Primary() == 0 { 142 if !m.ignoreDiacritics { 143 return -1 144 } 145 end = it.End() 146 } 147 return end 148 } 149 150 // Fill the buffer with the next batch of collation elements. 151 if !it.Next() { 152 return -1 153 } 154 } 155 }