github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/vendor_skip/golang.org/x/text/internal/colltab/contract.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package colltab 6 7 import "unicode/utf8" 8 9 // For a description of ContractTrieSet, see text/collate/build/contract.go. 10 11 type ContractTrieSet []struct{ L, H, N, I uint8 } 12 13 // ctScanner is used to match a trie to an input sequence. 14 // A contraction may match a non-contiguous sequence of bytes in an input string. 15 // For example, if there is a contraction for <a, combining_ring>, it should match 16 // the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does 17 // not block combining_ring. 18 // ctScanner does not automatically skip over non-blocking non-starters, but rather 19 // retains the state of the last match and leaves it up to the user to continue 20 // the match at the appropriate points. 21 type ctScanner struct { 22 states ContractTrieSet 23 s []byte 24 n int 25 index int 26 pindex int 27 done bool 28 } 29 30 type ctScannerString struct { 31 states ContractTrieSet 32 s string 33 n int 34 index int 35 pindex int 36 done bool 37 } 38 39 func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner { 40 return ctScanner{s: b, states: t[index:], n: n} 41 } 42 43 func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString { 44 return ctScannerString{s: str, states: t[index:], n: n} 45 } 46 47 // result returns the offset i and bytes consumed p so far. If no suffix 48 // matched, i and p will be 0. 49 func (s *ctScanner) result() (i, p int) { 50 return s.index, s.pindex 51 } 52 53 func (s *ctScannerString) result() (i, p int) { 54 return s.index, s.pindex 55 } 56 57 const ( 58 final = 0 59 noIndex = 0xFF 60 ) 61 62 // scan matches the longest suffix at the current location in the input 63 // and returns the number of bytes consumed. 64 func (s *ctScanner) scan(p int) int { 65 pr := p // the p at the rune start 66 str := s.s 67 states, n := s.states, s.n 68 for i := 0; i < n && p < len(str); { 69 e := states[i] 70 c := str[p] 71 // TODO: a significant number of contractions are of a form that 72 // cannot match discontiguous UTF-8 in a normalized string. We could let 73 // a negative value of e.n mean that we can set s.done = true and avoid 74 // the need for additional matches. 75 if c >= e.L { 76 if e.L == c { 77 p++ 78 if e.I != noIndex { 79 s.index = int(e.I) 80 s.pindex = p 81 } 82 if e.N != final { 83 i, states, n = 0, states[int(e.H)+n:], int(e.N) 84 if p >= len(str) || utf8.RuneStart(str[p]) { 85 s.states, s.n, pr = states, n, p 86 } 87 } else { 88 s.done = true 89 return p 90 } 91 continue 92 } else if e.N == final && c <= e.H { 93 p++ 94 s.done = true 95 s.index = int(c-e.L) + int(e.I) 96 s.pindex = p 97 return p 98 } 99 } 100 i++ 101 } 102 return pr 103 } 104 105 // scan is a verbatim copy of ctScanner.scan. 106 func (s *ctScannerString) scan(p int) int { 107 pr := p // the p at the rune start 108 str := s.s 109 states, n := s.states, s.n 110 for i := 0; i < n && p < len(str); { 111 e := states[i] 112 c := str[p] 113 // TODO: a significant number of contractions are of a form that 114 // cannot match discontiguous UTF-8 in a normalized string. We could let 115 // a negative value of e.n mean that we can set s.done = true and avoid 116 // the need for additional matches. 117 if c >= e.L { 118 if e.L == c { 119 p++ 120 if e.I != noIndex { 121 s.index = int(e.I) 122 s.pindex = p 123 } 124 if e.N != final { 125 i, states, n = 0, states[int(e.H)+n:], int(e.N) 126 if p >= len(str) || utf8.RuneStart(str[p]) { 127 s.states, s.n, pr = states, n, p 128 } 129 } else { 130 s.done = true 131 return p 132 } 133 continue 134 } else if e.N == final && c <= e.H { 135 p++ 136 s.done = true 137 s.index = int(c-e.L) + int(e.I) 138 s.pindex = p 139 return p 140 } 141 } 142 i++ 143 } 144 return pr 145 }