github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/collate/collate.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // TODO: remove hard-coded versions when we have implemented fractional weights. 6 // The current implementation is incompatible with later CLDR versions. 7 //go:generate go run maketables.go -cldr=23 -unicode=6.2.0 8 9 // Package collate contains types for comparing and sorting Unicode strings 10 // according to a given collation order. Package locale provides a high-level 11 // interface to collation. Users should typically use that package instead. 12 package collate // import "golang.org/x/text/collate" 13 14 import ( 15 "bytes" 16 "strings" 17 18 "golang.org/x/text/collate/colltab" 19 newcolltab "golang.org/x/text/internal/colltab" 20 "golang.org/x/text/language" 21 ) 22 23 // Collator provides functionality for comparing strings for a given 24 // collation order. 25 type Collator struct { 26 options 27 28 sorter sorter 29 30 _iter [2]iter 31 } 32 33 func (c *Collator) iter(i int) *iter { 34 // TODO: evaluate performance for making the second iterator optional. 35 return &c._iter[i] 36 } 37 38 // Supported returns the list of languages for which collating differs from its parent. 39 func Supported() []language.Tag { 40 // TODO: use language.Coverage instead. 41 42 t := make([]language.Tag, len(tags)) 43 copy(t, tags) 44 return t 45 } 46 47 func init() { 48 ids := strings.Split(availableLocales, ",") 49 tags = make([]language.Tag, len(ids)) 50 for i, s := range ids { 51 tags[i] = language.Raw.MustParse(s) 52 } 53 } 54 55 var tags []language.Tag 56 57 // New returns a new Collator initialized for the given locale. 58 func New(t language.Tag, o ...Option) *Collator { 59 index := newcolltab.MatchLang(t, tags) 60 c := newCollator(colltab.Init(locales[index])) 61 62 // Set options from the user-supplied tag. 63 c.setFromTag(t) 64 65 // Set the user-supplied options. 66 c.setOptions(o) 67 68 c.init() 69 return c 70 } 71 72 // NewFromTable returns a new Collator for the given Weighter. 73 func NewFromTable(w colltab.Weighter, o ...Option) *Collator { 74 c := newCollator(w) 75 c.setOptions(o) 76 c.init() 77 return c 78 } 79 80 func (c *Collator) init() { 81 if c.numeric { 82 c.t = colltab.NewNumericWeighter(c.t) 83 } 84 c._iter[0].init(c) 85 c._iter[1].init(c) 86 } 87 88 // Buffer holds keys generated by Key and KeyString. 89 type Buffer struct { 90 buf [4096]byte 91 key []byte 92 } 93 94 func (b *Buffer) init() { 95 if b.key == nil { 96 b.key = b.buf[:0] 97 } 98 } 99 100 // Reset clears the buffer from previous results generated by Key and KeyString. 101 func (b *Buffer) Reset() { 102 b.key = b.key[:0] 103 } 104 105 // Compare returns an integer comparing the two byte slices. 106 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. 107 func (c *Collator) Compare(a, b []byte) int { 108 // TODO: skip identical prefixes once we have a fast way to detect if a rune is 109 // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest. 110 c.iter(0).SetInput(a) 111 c.iter(1).SetInput(b) 112 if res := c.compare(); res != 0 { 113 return res 114 } 115 if !c.ignore[colltab.Identity] { 116 return bytes.Compare(a, b) 117 } 118 return 0 119 } 120 121 // CompareString returns an integer comparing the two strings. 122 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. 123 func (c *Collator) CompareString(a, b string) int { 124 // TODO: skip identical prefixes once we have a fast way to detect if a rune is 125 // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest. 126 c.iter(0).SetInputString(a) 127 c.iter(1).SetInputString(b) 128 if res := c.compare(); res != 0 { 129 return res 130 } 131 if !c.ignore[colltab.Identity] { 132 if a < b { 133 return -1 134 } else if a > b { 135 return 1 136 } 137 } 138 return 0 139 } 140 141 func compareLevel(f func(i *iter) int, a, b *iter) int { 142 a.pce = 0 143 b.pce = 0 144 for { 145 va := f(a) 146 vb := f(b) 147 if va != vb { 148 if va < vb { 149 return -1 150 } 151 return 1 152 } else if va == 0 { 153 break 154 } 155 } 156 return 0 157 } 158 159 func (c *Collator) compare() int { 160 ia, ib := c.iter(0), c.iter(1) 161 // Process primary level 162 if c.alternate != altShifted { 163 // TODO: implement script reordering 164 if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 { 165 return res 166 } 167 } else { 168 // TODO: handle shifted 169 } 170 if !c.ignore[colltab.Secondary] { 171 f := (*iter).nextSecondary 172 if c.backwards { 173 f = (*iter).prevSecondary 174 } 175 if res := compareLevel(f, ia, ib); res != 0 { 176 return res 177 } 178 } 179 // TODO: special case handling (Danish?) 180 if !c.ignore[colltab.Tertiary] || c.caseLevel { 181 if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 { 182 return res 183 } 184 if !c.ignore[colltab.Quaternary] { 185 if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 { 186 return res 187 } 188 } 189 } 190 return 0 191 } 192 193 // Key returns the collation key for str. 194 // Passing the buffer buf may avoid memory allocations. 195 // The returned slice will point to an allocation in Buffer and will remain 196 // valid until the next call to buf.Reset(). 197 func (c *Collator) Key(buf *Buffer, str []byte) []byte { 198 // See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details. 199 buf.init() 200 return c.key(buf, c.getColElems(str)) 201 } 202 203 // KeyFromString returns the collation key for str. 204 // Passing the buffer buf may avoid memory allocations. 205 // The returned slice will point to an allocation in Buffer and will retain 206 // valid until the next call to buf.ResetKeys(). 207 func (c *Collator) KeyFromString(buf *Buffer, str string) []byte { 208 // See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details. 209 buf.init() 210 return c.key(buf, c.getColElemsString(str)) 211 } 212 213 func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte { 214 processWeights(c.alternate, c.t.Top(), w) 215 kn := len(buf.key) 216 c.keyFromElems(buf, w) 217 return buf.key[kn:] 218 } 219 220 func (c *Collator) getColElems(str []byte) []colltab.Elem { 221 i := c.iter(0) 222 i.SetInput(str) 223 for i.Next() { 224 } 225 return i.Elems 226 } 227 228 func (c *Collator) getColElemsString(str string) []colltab.Elem { 229 i := c.iter(0) 230 i.SetInputString(str) 231 for i.Next() { 232 } 233 return i.Elems 234 } 235 236 type iter struct { 237 wa [512]colltab.Elem 238 239 newcolltab.Iter 240 pce int 241 } 242 243 func (i *iter) init(c *Collator) { 244 i.Weighter = c.t 245 i.Elems = i.wa[:0] 246 } 247 248 func (i *iter) nextPrimary() int { 249 for { 250 for ; i.pce < i.N; i.pce++ { 251 if v := i.Elems[i.pce].Primary(); v != 0 { 252 i.pce++ 253 return v 254 } 255 } 256 if !i.Next() { 257 return 0 258 } 259 } 260 panic("should not reach here") 261 } 262 263 func (i *iter) nextSecondary() int { 264 for ; i.pce < len(i.Elems); i.pce++ { 265 if v := i.Elems[i.pce].Secondary(); v != 0 { 266 i.pce++ 267 return v 268 } 269 } 270 return 0 271 } 272 273 func (i *iter) prevSecondary() int { 274 for ; i.pce < len(i.Elems); i.pce++ { 275 if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 { 276 i.pce++ 277 return v 278 } 279 } 280 return 0 281 } 282 283 func (i *iter) nextTertiary() int { 284 for ; i.pce < len(i.Elems); i.pce++ { 285 if v := i.Elems[i.pce].Tertiary(); v != 0 { 286 i.pce++ 287 return int(v) 288 } 289 } 290 return 0 291 } 292 293 func (i *iter) nextQuaternary() int { 294 for ; i.pce < len(i.Elems); i.pce++ { 295 if v := i.Elems[i.pce].Quaternary(); v != 0 { 296 i.pce++ 297 return v 298 } 299 } 300 return 0 301 } 302 303 func appendPrimary(key []byte, p int) []byte { 304 // Convert to variable length encoding; supports up to 23 bits. 305 if p <= 0x7FFF { 306 key = append(key, uint8(p>>8), uint8(p)) 307 } else { 308 key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p)) 309 } 310 return key 311 } 312 313 // keyFromElems converts the weights ws to a compact sequence of bytes. 314 // The result will be appended to the byte buffer in buf. 315 func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) { 316 for _, v := range ws { 317 if w := v.Primary(); w > 0 { 318 buf.key = appendPrimary(buf.key, w) 319 } 320 } 321 if !c.ignore[colltab.Secondary] { 322 buf.key = append(buf.key, 0, 0) 323 // TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF. 324 if !c.backwards { 325 for _, v := range ws { 326 if w := v.Secondary(); w > 0 { 327 buf.key = append(buf.key, uint8(w>>8), uint8(w)) 328 } 329 } 330 } else { 331 for i := len(ws) - 1; i >= 0; i-- { 332 if w := ws[i].Secondary(); w > 0 { 333 buf.key = append(buf.key, uint8(w>>8), uint8(w)) 334 } 335 } 336 } 337 } else if c.caseLevel { 338 buf.key = append(buf.key, 0, 0) 339 } 340 if !c.ignore[colltab.Tertiary] || c.caseLevel { 341 buf.key = append(buf.key, 0, 0) 342 for _, v := range ws { 343 if w := v.Tertiary(); w > 0 { 344 buf.key = append(buf.key, uint8(w)) 345 } 346 } 347 // Derive the quaternary weights from the options and other levels. 348 // Note that we represent MaxQuaternary as 0xFF. The first byte of the 349 // representation of a primary weight is always smaller than 0xFF, 350 // so using this single byte value will compare correctly. 351 if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted { 352 if c.alternate == altShiftTrimmed { 353 lastNonFFFF := len(buf.key) 354 buf.key = append(buf.key, 0) 355 for _, v := range ws { 356 if w := v.Quaternary(); w == colltab.MaxQuaternary { 357 buf.key = append(buf.key, 0xFF) 358 } else if w > 0 { 359 buf.key = appendPrimary(buf.key, w) 360 lastNonFFFF = len(buf.key) 361 } 362 } 363 buf.key = buf.key[:lastNonFFFF] 364 } else { 365 buf.key = append(buf.key, 0) 366 for _, v := range ws { 367 if w := v.Quaternary(); w == colltab.MaxQuaternary { 368 buf.key = append(buf.key, 0xFF) 369 } else if w > 0 { 370 buf.key = appendPrimary(buf.key, w) 371 } 372 } 373 } 374 } 375 } 376 } 377 378 func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) { 379 ignore := false 380 vtop := int(top) 381 switch vw { 382 case altShifted, altShiftTrimmed: 383 for i := range wa { 384 if p := wa[i].Primary(); p <= vtop && p != 0 { 385 wa[i] = colltab.MakeQuaternary(p) 386 ignore = true 387 } else if p == 0 { 388 if ignore { 389 wa[i] = colltab.Ignore 390 } 391 } else { 392 ignore = false 393 } 394 } 395 case altBlanked: 396 for i := range wa { 397 if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) { 398 wa[i] = colltab.Ignore 399 ignore = true 400 } else { 401 ignore = false 402 } 403 } 404 } 405 }