github.com/mdaxf/iac@v0.0.0-20240519030858-58a061660378/vendor_skip/golang.org/x/text/internal/colltab/numeric.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package colltab 6 7 import ( 8 "unicode" 9 "unicode/utf8" 10 ) 11 12 // NewNumericWeighter wraps w to replace individual digits to sort based on their 13 // numeric value. 14 // 15 // Weighter w must have a free primary weight after the primary weight for 9. 16 // If this is not the case, numeric value will sort at the same primary level 17 // as the first primary sorting after 9. 18 func NewNumericWeighter(w Weighter) Weighter { 19 getElem := func(s string) Elem { 20 elems, _ := w.AppendNextString(nil, s) 21 return elems[0] 22 } 23 nine := getElem("9") 24 25 // Numbers should order before zero, but the DUCET has no room for this. 26 // TODO: move before zero once we use fractional collation elements. 27 ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0) 28 29 return &numericWeighter{ 30 Weighter: w, 31 32 // We assume that w sorts digits of different kinds in order of numeric 33 // value and that the tertiary weight order is preserved. 34 // 35 // TODO: evaluate whether it is worth basing the ranges on the Elem 36 // encoding itself once the move to fractional weights is complete. 37 zero: getElem("0"), 38 zeroSpecialLo: getElem("0"), // U+FF10 FULLWIDTH DIGIT ZERO 39 zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO 40 nine: nine, 41 nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE 42 numberStart: ns, 43 } 44 } 45 46 // A numericWeighter translates a stream of digits into a stream of weights 47 // representing the numeric value. 48 type numericWeighter struct { 49 Weighter 50 51 // The Elems below all demarcate boundaries of specific ranges. With the 52 // current element encoding digits are in two ranges: normal (default 53 // tertiary value) and special. For most languages, digits have collation 54 // elements in the normal range. 55 // 56 // Note: the range tests are very specific for the element encoding used by 57 // this implementation. The tests in collate_test.go are designed to fail 58 // if this code is not updated when an encoding has changed. 59 60 zero Elem // normal digit zero 61 zeroSpecialLo Elem // special digit zero, low tertiary value 62 zeroSpecialHi Elem // special digit zero, high tertiary value 63 nine Elem // normal digit nine 64 nineSpecialHi Elem // special digit nine 65 numberStart Elem 66 } 67 68 // AppendNext calls the namesake of the underlying weigher, but replaces single 69 // digits with weights representing their value. 70 func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) { 71 ce, n = nw.Weighter.AppendNext(buf, s) 72 nc := numberConverter{ 73 elems: buf, 74 w: nw, 75 b: s, 76 } 77 isZero, ok := nc.checkNextDigit(ce) 78 if !ok { 79 return ce, n 80 } 81 // ce might have been grown already, so take it instead of buf. 82 nc.init(ce, len(buf), isZero) 83 for n < len(s) { 84 ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:]) 85 nc.b = s 86 n += sz 87 if !nc.update(ce) { 88 break 89 } 90 } 91 return nc.result(), n 92 } 93 94 // AppendNextString calls the namesake of the underlying weigher, but replaces 95 // single digits with weights representing their value. 96 func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) { 97 ce, n = nw.Weighter.AppendNextString(buf, s) 98 nc := numberConverter{ 99 elems: buf, 100 w: nw, 101 s: s, 102 } 103 isZero, ok := nc.checkNextDigit(ce) 104 if !ok { 105 return ce, n 106 } 107 nc.init(ce, len(buf), isZero) 108 for n < len(s) { 109 ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:]) 110 nc.s = s 111 n += sz 112 if !nc.update(ce) { 113 break 114 } 115 } 116 return nc.result(), n 117 } 118 119 type numberConverter struct { 120 w *numericWeighter 121 122 elems []Elem 123 nDigits int 124 lenIndex int 125 126 s string // set if the input was of type string 127 b []byte // set if the input was of type []byte 128 } 129 130 // init completes initialization of a numberConverter and prepares it for adding 131 // more digits. elems is assumed to have a digit starting at oldLen. 132 func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) { 133 // Insert a marker indicating the start of a number and a placeholder 134 // for the number of digits. 135 if isZero { 136 elems = append(elems[:oldLen], nc.w.numberStart, 0) 137 } else { 138 elems = append(elems, 0, 0) 139 copy(elems[oldLen+2:], elems[oldLen:]) 140 elems[oldLen] = nc.w.numberStart 141 elems[oldLen+1] = 0 142 143 nc.nDigits = 1 144 } 145 nc.elems = elems 146 nc.lenIndex = oldLen + 1 147 } 148 149 // checkNextDigit reports whether bufNew adds a single digit relative to the old 150 // buffer. If it does, it also reports whether this digit is zero. 151 func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) { 152 if len(nc.elems) >= len(bufNew) { 153 return false, false 154 } 155 e := bufNew[len(nc.elems)] 156 if e < nc.w.zeroSpecialLo || nc.w.nine < e { 157 // Not a number. 158 return false, false 159 } 160 if e < nc.w.zero { 161 if e > nc.w.nineSpecialHi { 162 // Not a number. 163 return false, false 164 } 165 if !nc.isDigit() { 166 return false, false 167 } 168 isZero = e <= nc.w.zeroSpecialHi 169 } else { 170 // This is the common case if we encounter a digit. 171 isZero = e == nc.w.zero 172 } 173 // Test the remaining added collation elements have a zero primary value. 174 if n := len(bufNew) - len(nc.elems); n > 1 { 175 for i := len(nc.elems) + 1; i < len(bufNew); i++ { 176 if bufNew[i].Primary() != 0 { 177 return false, false 178 } 179 } 180 // In some rare cases, collation elements will encode runes in 181 // unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371) 182 // are not in Nd. Also some digits that clearly belong in unicode.No, 183 // like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have 184 // collation elements indistinguishable from normal digits. 185 // Unfortunately, this means we need to make this check for nearly all 186 // non-Latin digits. 187 // 188 // TODO: check the performance impact and find something better if it is 189 // an issue. 190 if !nc.isDigit() { 191 return false, false 192 } 193 } 194 return isZero, true 195 } 196 197 func (nc *numberConverter) isDigit() bool { 198 if nc.b != nil { 199 r, _ := utf8.DecodeRune(nc.b) 200 return unicode.In(r, unicode.Nd) 201 } 202 r, _ := utf8.DecodeRuneInString(nc.s) 203 return unicode.In(r, unicode.Nd) 204 } 205 206 // We currently support a maximum of about 2M digits (the number of primary 207 // values). Such numbers will compare correctly against small numbers, but their 208 // comparison against other large numbers is undefined. 209 // 210 // TODO: define a proper fallback, such as comparing large numbers textually or 211 // actually allowing numbers of unlimited length. 212 // 213 // TODO: cap this to a lower number (like 100) and maybe allow a larger number 214 // in an option? 215 const maxDigits = 1<<maxPrimaryBits - 1 216 217 func (nc *numberConverter) update(elems []Elem) bool { 218 isZero, ok := nc.checkNextDigit(elems) 219 if nc.nDigits == 0 && isZero { 220 return true 221 } 222 nc.elems = elems 223 if !ok { 224 return false 225 } 226 nc.nDigits++ 227 return nc.nDigits < maxDigits 228 } 229 230 // result fills in the length element for the digit sequence and returns the 231 // completed collation elements. 232 func (nc *numberConverter) result() []Elem { 233 e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0) 234 nc.elems[nc.lenIndex] = e 235 return nc.elems 236 }