github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/vendor_skip/golang.org/x/text/collate/collate.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // TODO: remove hard-coded versions when we have implemented fractional weights. 6 // The current implementation is incompatible with later CLDR versions. 7 //go:generate go run maketables.go -cldr=23 -unicode=6.2.0 8 9 // Package collate contains types for comparing and sorting Unicode strings 10 // according to a given collation order. 11 package collate // import "golang.org/x/text/collate" 12 13 import ( 14 "bytes" 15 "strings" 16 17 "golang.org/x/text/internal/colltab" 18 "golang.org/x/text/language" 19 ) 20 21 // Collator provides functionality for comparing strings for a given 22 // collation order. 23 type Collator struct { 24 options 25 26 sorter sorter 27 28 _iter [2]iter 29 } 30 31 func (c *Collator) iter(i int) *iter { 32 // TODO: evaluate performance for making the second iterator optional. 33 return &c._iter[i] 34 } 35 36 // Supported returns the list of languages for which collating differs from its parent. 37 func Supported() []language.Tag { 38 // TODO: use language.Coverage instead. 39 40 t := make([]language.Tag, len(tags)) 41 copy(t, tags) 42 return t 43 } 44 45 func init() { 46 ids := strings.Split(availableLocales, ",") 47 tags = make([]language.Tag, len(ids)) 48 for i, s := range ids { 49 tags[i] = language.Raw.MustParse(s) 50 } 51 } 52 53 var tags []language.Tag 54 55 // New returns a new Collator initialized for the given locale. 56 func New(t language.Tag, o ...Option) *Collator { 57 index := colltab.MatchLang(t, tags) 58 c := newCollator(getTable(locales[index])) 59 60 // Set options from the user-supplied tag. 61 c.setFromTag(t) 62 63 // Set the user-supplied options. 64 c.setOptions(o) 65 66 c.init() 67 return c 68 } 69 70 // NewFromTable returns a new Collator for the given Weighter. 71 func NewFromTable(w colltab.Weighter, o ...Option) *Collator { 72 c := newCollator(w) 73 c.setOptions(o) 74 c.init() 75 return c 76 } 77 78 func (c *Collator) init() { 79 if c.numeric { 80 c.t = colltab.NewNumericWeighter(c.t) 81 } 82 c._iter[0].init(c) 83 c._iter[1].init(c) 84 } 85 86 // Buffer holds keys generated by Key and KeyString. 87 type Buffer struct { 88 buf [4096]byte 89 key []byte 90 } 91 92 func (b *Buffer) init() { 93 if b.key == nil { 94 b.key = b.buf[:0] 95 } 96 } 97 98 // Reset clears the buffer from previous results generated by Key and KeyString. 99 func (b *Buffer) Reset() { 100 b.key = b.key[:0] 101 } 102 103 // Compare returns an integer comparing the two byte slices. 104 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. 105 func (c *Collator) Compare(a, b []byte) int { 106 // TODO: skip identical prefixes once we have a fast way to detect if a rune is 107 // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest. 108 c.iter(0).SetInput(a) 109 c.iter(1).SetInput(b) 110 if res := c.compare(); res != 0 { 111 return res 112 } 113 if !c.ignore[colltab.Identity] { 114 return bytes.Compare(a, b) 115 } 116 return 0 117 } 118 119 // CompareString returns an integer comparing the two strings. 120 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. 121 func (c *Collator) CompareString(a, b string) int { 122 // TODO: skip identical prefixes once we have a fast way to detect if a rune is 123 // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest. 124 c.iter(0).SetInputString(a) 125 c.iter(1).SetInputString(b) 126 if res := c.compare(); res != 0 { 127 return res 128 } 129 if !c.ignore[colltab.Identity] { 130 if a < b { 131 return -1 132 } else if a > b { 133 return 1 134 } 135 } 136 return 0 137 } 138 139 func compareLevel(f func(i *iter) int, a, b *iter) int { 140 a.pce = 0 141 b.pce = 0 142 for { 143 va := f(a) 144 vb := f(b) 145 if va != vb { 146 if va < vb { 147 return -1 148 } 149 return 1 150 } else if va == 0 { 151 break 152 } 153 } 154 return 0 155 } 156 157 func (c *Collator) compare() int { 158 ia, ib := c.iter(0), c.iter(1) 159 // Process primary level 160 if c.alternate != altShifted { 161 // TODO: implement script reordering 162 if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 { 163 return res 164 } 165 } else { 166 // TODO: handle shifted 167 } 168 if !c.ignore[colltab.Secondary] { 169 f := (*iter).nextSecondary 170 if c.backwards { 171 f = (*iter).prevSecondary 172 } 173 if res := compareLevel(f, ia, ib); res != 0 { 174 return res 175 } 176 } 177 // TODO: special case handling (Danish?) 178 if !c.ignore[colltab.Tertiary] || c.caseLevel { 179 if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 { 180 return res 181 } 182 if !c.ignore[colltab.Quaternary] { 183 if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 { 184 return res 185 } 186 } 187 } 188 return 0 189 } 190 191 // Key returns the collation key for str. 192 // Passing the buffer buf may avoid memory allocations. 193 // The returned slice will point to an allocation in Buffer and will remain 194 // valid until the next call to buf.Reset(). 195 func (c *Collator) Key(buf *Buffer, str []byte) []byte { 196 // See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details. 197 buf.init() 198 return c.key(buf, c.getColElems(str)) 199 } 200 201 // KeyFromString returns the collation key for str. 202 // Passing the buffer buf may avoid memory allocations. 203 // The returned slice will point to an allocation in Buffer and will retain 204 // valid until the next call to buf.ResetKeys(). 205 func (c *Collator) KeyFromString(buf *Buffer, str string) []byte { 206 // See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details. 207 buf.init() 208 return c.key(buf, c.getColElemsString(str)) 209 } 210 211 func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte { 212 processWeights(c.alternate, c.t.Top(), w) 213 kn := len(buf.key) 214 c.keyFromElems(buf, w) 215 return buf.key[kn:] 216 } 217 218 func (c *Collator) getColElems(str []byte) []colltab.Elem { 219 i := c.iter(0) 220 i.SetInput(str) 221 for i.Next() { 222 } 223 return i.Elems 224 } 225 226 func (c *Collator) getColElemsString(str string) []colltab.Elem { 227 i := c.iter(0) 228 i.SetInputString(str) 229 for i.Next() { 230 } 231 return i.Elems 232 } 233 234 type iter struct { 235 wa [512]colltab.Elem 236 237 colltab.Iter 238 pce int 239 } 240 241 func (i *iter) init(c *Collator) { 242 i.Weighter = c.t 243 i.Elems = i.wa[:0] 244 } 245 246 func (i *iter) nextPrimary() int { 247 for { 248 for ; i.pce < i.N; i.pce++ { 249 if v := i.Elems[i.pce].Primary(); v != 0 { 250 i.pce++ 251 return v 252 } 253 } 254 if !i.Next() { 255 return 0 256 } 257 } 258 panic("should not reach here") 259 } 260 261 func (i *iter) nextSecondary() int { 262 for ; i.pce < len(i.Elems); i.pce++ { 263 if v := i.Elems[i.pce].Secondary(); v != 0 { 264 i.pce++ 265 return v 266 } 267 } 268 return 0 269 } 270 271 func (i *iter) prevSecondary() int { 272 for ; i.pce < len(i.Elems); i.pce++ { 273 if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 { 274 i.pce++ 275 return v 276 } 277 } 278 return 0 279 } 280 281 func (i *iter) nextTertiary() int { 282 for ; i.pce < len(i.Elems); i.pce++ { 283 if v := i.Elems[i.pce].Tertiary(); v != 0 { 284 i.pce++ 285 return int(v) 286 } 287 } 288 return 0 289 } 290 291 func (i *iter) nextQuaternary() int { 292 for ; i.pce < len(i.Elems); i.pce++ { 293 if v := i.Elems[i.pce].Quaternary(); v != 0 { 294 i.pce++ 295 return v 296 } 297 } 298 return 0 299 } 300 301 func appendPrimary(key []byte, p int) []byte { 302 // Convert to variable length encoding; supports up to 23 bits. 303 if p <= 0x7FFF { 304 key = append(key, uint8(p>>8), uint8(p)) 305 } else { 306 key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p)) 307 } 308 return key 309 } 310 311 // keyFromElems converts the weights ws to a compact sequence of bytes. 312 // The result will be appended to the byte buffer in buf. 313 func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) { 314 for _, v := range ws { 315 if w := v.Primary(); w > 0 { 316 buf.key = appendPrimary(buf.key, w) 317 } 318 } 319 if !c.ignore[colltab.Secondary] { 320 buf.key = append(buf.key, 0, 0) 321 // TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF. 322 if !c.backwards { 323 for _, v := range ws { 324 if w := v.Secondary(); w > 0 { 325 buf.key = append(buf.key, uint8(w>>8), uint8(w)) 326 } 327 } 328 } else { 329 for i := len(ws) - 1; i >= 0; i-- { 330 if w := ws[i].Secondary(); w > 0 { 331 buf.key = append(buf.key, uint8(w>>8), uint8(w)) 332 } 333 } 334 } 335 } else if c.caseLevel { 336 buf.key = append(buf.key, 0, 0) 337 } 338 if !c.ignore[colltab.Tertiary] || c.caseLevel { 339 buf.key = append(buf.key, 0, 0) 340 for _, v := range ws { 341 if w := v.Tertiary(); w > 0 { 342 buf.key = append(buf.key, uint8(w)) 343 } 344 } 345 // Derive the quaternary weights from the options and other levels. 346 // Note that we represent MaxQuaternary as 0xFF. The first byte of the 347 // representation of a primary weight is always smaller than 0xFF, 348 // so using this single byte value will compare correctly. 349 if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted { 350 if c.alternate == altShiftTrimmed { 351 lastNonFFFF := len(buf.key) 352 buf.key = append(buf.key, 0) 353 for _, v := range ws { 354 if w := v.Quaternary(); w == colltab.MaxQuaternary { 355 buf.key = append(buf.key, 0xFF) 356 } else if w > 0 { 357 buf.key = appendPrimary(buf.key, w) 358 lastNonFFFF = len(buf.key) 359 } 360 } 361 buf.key = buf.key[:lastNonFFFF] 362 } else { 363 buf.key = append(buf.key, 0) 364 for _, v := range ws { 365 if w := v.Quaternary(); w == colltab.MaxQuaternary { 366 buf.key = append(buf.key, 0xFF) 367 } else if w > 0 { 368 buf.key = appendPrimary(buf.key, w) 369 } 370 } 371 } 372 } 373 } 374 } 375 376 func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) { 377 ignore := false 378 vtop := int(top) 379 switch vw { 380 case altShifted, altShiftTrimmed: 381 for i := range wa { 382 if p := wa[i].Primary(); p <= vtop && p != 0 { 383 wa[i] = colltab.MakeQuaternary(p) 384 ignore = true 385 } else if p == 0 { 386 if ignore { 387 wa[i] = colltab.Ignore 388 } 389 } else { 390 ignore = false 391 } 392 } 393 case altBlanked: 394 for i := range wa { 395 if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) { 396 wa[i] = colltab.Ignore 397 ignore = true 398 } else { 399 ignore = false 400 } 401 } 402 } 403 }