github.com/go-xe2/third@v1.0.3/golang.org/x/text/internal/colltab/trie.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // The trie in this file is used to associate the first full character in an 6 // UTF-8 string to a collation element. All but the last byte in a UTF-8 byte 7 // sequence are used to lookup offsets in the index table to be used for the 8 // next byte. The last byte is used to index into a table of collation elements. 9 // For a full description, see go.text/collate/build/trie.go. 10 11 package colltab 12 13 const blockSize = 64 14 15 type Trie struct { 16 Index0 []uint16 // index for first byte (0xC0-0xFF) 17 Values0 []uint32 // index for first byte (0x00-0x7F) 18 Index []uint16 19 Values []uint32 20 } 21 22 const ( 23 t1 = 0x00 // 0000 0000 24 tx = 0x80 // 1000 0000 25 t2 = 0xC0 // 1100 0000 26 t3 = 0xE0 // 1110 0000 27 t4 = 0xF0 // 1111 0000 28 t5 = 0xF8 // 1111 1000 29 t6 = 0xFC // 1111 1100 30 te = 0xFE // 1111 1110 31 ) 32 33 func (t *Trie) lookupValue(n uint16, b byte) Elem { 34 return Elem(t.Values[int(n)<<6+int(b)]) 35 } 36 37 // lookup returns the trie value for the first UTF-8 encoding in s and 38 // the width in bytes of this encoding. The size will be 0 if s does not 39 // hold enough bytes to complete the encoding. len(s) must be greater than 0. 40 func (t *Trie) lookup(s []byte) (v Elem, sz int) { 41 c0 := s[0] 42 switch { 43 case c0 < tx: 44 return Elem(t.Values0[c0]), 1 45 case c0 < t2: 46 return 0, 1 47 case c0 < t3: 48 if len(s) < 2 { 49 return 0, 0 50 } 51 i := t.Index0[c0] 52 c1 := s[1] 53 if c1 < tx || t2 <= c1 { 54 return 0, 1 55 } 56 return t.lookupValue(i, c1), 2 57 case c0 < t4: 58 if len(s) < 3 { 59 return 0, 0 60 } 61 i := t.Index0[c0] 62 c1 := s[1] 63 if c1 < tx || t2 <= c1 { 64 return 0, 1 65 } 66 o := int(i)<<6 + int(c1) 67 i = t.Index[o] 68 c2 := s[2] 69 if c2 < tx || t2 <= c2 { 70 return 0, 2 71 } 72 return t.lookupValue(i, c2), 3 73 case c0 < t5: 74 if len(s) < 4 { 75 return 0, 0 76 } 77 i := t.Index0[c0] 78 c1 := s[1] 79 if c1 < tx || t2 <= c1 { 80 return 0, 1 81 } 82 o := int(i)<<6 + int(c1) 83 i = t.Index[o] 84 c2 := s[2] 85 if c2 < tx || t2 <= c2 { 86 return 0, 2 87 } 88 o = int(i)<<6 + int(c2) 89 i = t.Index[o] 90 c3 := s[3] 91 if c3 < tx || t2 <= c3 { 92 return 0, 3 93 } 94 return t.lookupValue(i, c3), 4 95 } 96 // Illegal rune 97 return 0, 1 98 } 99 100 // The body of lookupString is a verbatim copy of that of lookup. 101 func (t *Trie) lookupString(s string) (v Elem, sz int) { 102 c0 := s[0] 103 switch { 104 case c0 < tx: 105 return Elem(t.Values0[c0]), 1 106 case c0 < t2: 107 return 0, 1 108 case c0 < t3: 109 if len(s) < 2 { 110 return 0, 0 111 } 112 i := t.Index0[c0] 113 c1 := s[1] 114 if c1 < tx || t2 <= c1 { 115 return 0, 1 116 } 117 return t.lookupValue(i, c1), 2 118 case c0 < t4: 119 if len(s) < 3 { 120 return 0, 0 121 } 122 i := t.Index0[c0] 123 c1 := s[1] 124 if c1 < tx || t2 <= c1 { 125 return 0, 1 126 } 127 o := int(i)<<6 + int(c1) 128 i = t.Index[o] 129 c2 := s[2] 130 if c2 < tx || t2 <= c2 { 131 return 0, 2 132 } 133 return t.lookupValue(i, c2), 3 134 case c0 < t5: 135 if len(s) < 4 { 136 return 0, 0 137 } 138 i := t.Index0[c0] 139 c1 := s[1] 140 if c1 < tx || t2 <= c1 { 141 return 0, 1 142 } 143 o := int(i)<<6 + int(c1) 144 i = t.Index[o] 145 c2 := s[2] 146 if c2 < tx || t2 <= c2 { 147 return 0, 2 148 } 149 o = int(i)<<6 + int(c2) 150 i = t.Index[o] 151 c3 := s[3] 152 if c3 < tx || t2 <= c3 { 153 return 0, 3 154 } 155 return t.lookupValue(i, c3), 4 156 } 157 // Illegal rune 158 return 0, 1 159 }