github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/text/unicode/norm/forminfo.go (about) 1 // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3 // Copyright 2011 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package norm 8 9 // This file contains Form-specific logic and wrappers for data in tables.go. 10 11 // Rune info is stored in a separate trie per composing form. A composing form 12 // and its corresponding decomposing form share the same trie. Each trie maps 13 // a rune to a uint16. The values take two forms. For v >= 0x8000: 14 // bits 15 // 15: 1 (inverse of NFD_QC bit of qcInfo) 16 // 13..7: qcInfo (see below). isYesD is always true (no decompostion). 17 // 6..0: ccc (compressed CCC value). 18 // For v < 0x8000, the respective rune has a decomposition and v is an index 19 // into a byte array of UTF-8 decomposition sequences and additional info and 20 // has the form: 21 // <header> <decomp_byte>* [<tccc> [<lccc>]] 22 // The header contains the number of bytes in the decomposition (excluding this 23 // length byte). The two most significant bits of this length byte correspond 24 // to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1. 25 // The byte sequence is followed by a trailing and leading CCC if the values 26 // for these are not zero. The value of v determines which ccc are appended 27 // to the sequences. For v < firstCCC, there are none, for v >= firstCCC, 28 // the sequence is followed by a trailing ccc, and for v >= firstLeadingCC 29 // there is an additional leading ccc. The value of tccc itself is the 30 // trailing CCC shifted left 2 bits. The two least-significant bits of tccc 31 // are the number of trailing non-starters. 32 33 const ( 34 qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo 35 headerLenMask = 0x3F // extract the length value from the header byte 36 headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte 37 ) 38 39 // Properties provides access to normalization properties of a rune. 40 type Properties struct { 41 pos uint8 // start position in reorderBuffer; used in composition.go 42 size uint8 // length of UTF-8 encoding of this rune 43 ccc uint8 // leading canonical combining class (ccc if not decomposition) 44 tccc uint8 // trailing canonical combining class (ccc if not decomposition) 45 nLead uint8 // number of leading non-starters. 46 flags qcInfo // quick check flags 47 index uint16 48 } 49 50 // functions dispatchable per form 51 type lookupFunc func(b input, i int) Properties 52 53 // formInfo holds Form-specific functions and tables. 54 type formInfo struct { 55 form Form 56 composing, compatibility bool // form type 57 info lookupFunc 58 nextMain iterFunc 59 } 60 61 var formTable = []*formInfo{{ 62 form: NFC, 63 composing: true, 64 compatibility: false, 65 info: lookupInfoNFC, 66 nextMain: nextComposed, 67 }, { 68 form: NFD, 69 composing: false, 70 compatibility: false, 71 info: lookupInfoNFC, 72 nextMain: nextDecomposed, 73 }, { 74 form: NFKC, 75 composing: true, 76 compatibility: true, 77 info: lookupInfoNFKC, 78 nextMain: nextComposed, 79 }, { 80 form: NFKD, 81 composing: false, 82 compatibility: true, 83 info: lookupInfoNFKC, 84 nextMain: nextDecomposed, 85 }} 86 87 // We do not distinguish between boundaries for NFC, NFD, etc. to avoid 88 // unexpected behavior for the user. For example, in NFD, there is a boundary 89 // after 'a'. However, 'a' might combine with modifiers, so from the application's 90 // perspective it is not a good boundary. We will therefore always use the 91 // boundaries for the combining variants. 92 93 // BoundaryBefore returns true if this rune starts a new segment and 94 // cannot combine with any rune on the left. 95 func (p Properties) BoundaryBefore() bool { 96 if p.ccc == 0 && !p.combinesBackward() { 97 return true 98 } 99 // We assume that the CCC of the first character in a decomposition 100 // is always non-zero if different from info.ccc and that we can return 101 // false at this point. This is verified by maketables. 102 return false 103 } 104 105 // BoundaryAfter returns true if runes cannot combine with or otherwise 106 // interact with this or previous runes. 107 func (p Properties) BoundaryAfter() bool { 108 // TODO: loosen these conditions. 109 return p.isInert() 110 } 111 112 // We pack quick check data in 4 bits: 113 // 5: Combines forward (0 == false, 1 == true) 114 // 4..3: NFC_QC Yes(00), No (10), or Maybe (11) 115 // 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. 116 // 1..0: Number of trailing non-starters. 117 // 118 // When all 4 bits are zero, the character is inert, meaning it is never 119 // influenced by normalization. 120 type qcInfo uint8 121 122 func (p Properties) isYesC() bool { return p.flags&0x10 == 0 } 123 func (p Properties) isYesD() bool { return p.flags&0x4 == 0 } 124 125 func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 } 126 func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe 127 func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD 128 129 func (p Properties) isInert() bool { 130 return p.flags&qcInfoMask == 0 && p.ccc == 0 131 } 132 133 func (p Properties) multiSegment() bool { 134 return p.index >= firstMulti && p.index < endMulti 135 } 136 137 func (p Properties) nLeadingNonStarters() uint8 { 138 return p.nLead 139 } 140 141 func (p Properties) nTrailingNonStarters() uint8 { 142 return uint8(p.flags & 0x03) 143 } 144 145 // Decomposition returns the decomposition for the underlying rune 146 // or nil if there is none. 147 func (p Properties) Decomposition() []byte { 148 // TODO: create the decomposition for Hangul? 149 if p.index == 0 { 150 return nil 151 } 152 i := p.index 153 n := decomps[i] & headerLenMask 154 i++ 155 return decomps[i : i+uint16(n)] 156 } 157 158 // Size returns the length of UTF-8 encoding of the rune. 159 func (p Properties) Size() int { 160 return int(p.size) 161 } 162 163 // CCC returns the canonical combining class of the underlying rune. 164 func (p Properties) CCC() uint8 { 165 if p.index >= firstCCCZeroExcept { 166 return 0 167 } 168 return ccc[p.ccc] 169 } 170 171 // LeadCCC returns the CCC of the first rune in the decomposition. 172 // If there is no decomposition, LeadCCC equals CCC. 173 func (p Properties) LeadCCC() uint8 { 174 return ccc[p.ccc] 175 } 176 177 // TrailCCC returns the CCC of the last rune in the decomposition. 178 // If there is no decomposition, TrailCCC equals CCC. 179 func (p Properties) TrailCCC() uint8 { 180 return ccc[p.tccc] 181 } 182 183 // Recomposition 184 // We use 32-bit keys instead of 64-bit for the two codepoint keys. 185 // This clips off the bits of three entries, but we know this will not 186 // result in a collision. In the unlikely event that changes to 187 // UnicodeData.txt introduce collisions, the compiler will catch it. 188 // Note that the recomposition map for NFC and NFKC are identical. 189 190 // combine returns the combined rune or 0 if it doesn't exist. 191 func combine(a, b rune) rune { 192 key := uint32(uint16(a))<<16 + uint32(uint16(b)) 193 return recompMap[key] 194 } 195 196 func lookupInfoNFC(b input, i int) Properties { 197 v, sz := b.charinfoNFC(i) 198 return compInfo(v, sz) 199 } 200 201 func lookupInfoNFKC(b input, i int) Properties { 202 v, sz := b.charinfoNFKC(i) 203 return compInfo(v, sz) 204 } 205 206 // Properties returns properties for the first rune in s. 207 func (f Form) Properties(s []byte) Properties { 208 if f == NFC || f == NFD { 209 return compInfo(nfcData.lookup(s)) 210 } 211 return compInfo(nfkcData.lookup(s)) 212 } 213 214 // PropertiesString returns properties for the first rune in s. 215 func (f Form) PropertiesString(s string) Properties { 216 if f == NFC || f == NFD { 217 return compInfo(nfcData.lookupString(s)) 218 } 219 return compInfo(nfkcData.lookupString(s)) 220 } 221 222 // compInfo converts the information contained in v and sz 223 // to a Properties. See the comment at the top of the file 224 // for more information on the format. 225 func compInfo(v uint16, sz int) Properties { 226 if v == 0 { 227 return Properties{size: uint8(sz)} 228 } else if v >= 0x8000 { 229 p := Properties{ 230 size: uint8(sz), 231 ccc: uint8(v), 232 tccc: uint8(v), 233 flags: qcInfo(v >> 8), 234 } 235 if p.ccc > 0 || p.combinesBackward() { 236 p.nLead = uint8(p.flags & 0x3) 237 } 238 return p 239 } 240 // has decomposition 241 h := decomps[v] 242 f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4 243 p := Properties{size: uint8(sz), flags: f, index: v} 244 if v >= firstCCC { 245 v += uint16(h&headerLenMask) + 1 246 c := decomps[v] 247 p.tccc = c >> 2 248 p.flags |= qcInfo(c & 0x3) 249 if v >= firstLeadingCCC { 250 p.nLead = c & 0x3 251 if v >= firstStarterWithNLead { 252 // We were tricked. Remove the decomposition. 253 p.flags &= 0x03 254 p.index = 0 255 return p 256 } 257 p.ccc = decomps[v+1] 258 } 259 } 260 return p 261 }