github.com/tawesoft/golib/v2@v2.10.0/legacy/localize/decimalformat.go (about) 1 package localize 2 3 import ( 4 "math" 5 "strconv" 6 "unicode" 7 "unicode/utf8" 8 9 "golang.org/x/text/language" 10 "golang.org/x/text/message" 11 "golang.org/x/text/number" 12 ) 13 14 // acceptRune returns the length of r in bytes if r is the first rune in s, 15 // otherwise returns zero. 16 func acceptRune(r rune, s string) int { 17 if f, ok := firstRune(s); ok && (f == r) { 18 return utf8.RuneLen(r) 19 } else { 20 return 0 21 } 22 } 23 24 // firstRune returns the first rune in a string and true, or (_, false). 25 func firstRune(s string) (rune, bool) { 26 for _, c := range s { 27 return c, true 28 } 29 return runeNone, false 30 } 31 32 // guessDecimalGroupSeparator guesses, for a printer in a given locale, 33 // the group separator rune in a decimal number system e.g. comma for British. 34 func guessDecimalGroupSeparator(p *message.Printer) rune { 35 // heuristic: any rune that appears at least twice is probably a comma 36 s := p.Sprint(number.Decimal(1234567890)) 37 return repeatingRune(s) 38 } 39 40 // guessDecimalPointSeparator guesses, for a printer in a given locale, 41 // the decimal point rune in a decimal number system, e.g. period for British. 42 func guessDecimalPoint(p *message.Printer) rune { 43 // heuristic: any rune that is common to both these strings is probably a 44 // decimal point. Concat the strings and find any repeated rune. 45 s1 := p.Sprint(number.Decimal(1.23)) 46 s2 := p.Sprint(number.Decimal(4.56)) 47 s := s1 + s2 48 return repeatingRune(s) 49 } 50 51 // guessDecimalDigits guesses, for a printer in a given locale, the digits 52 // representing the values 0 to 9. 53 func guessDecimalDigits(p *message.Printer, out *[10]rune) { 54 for i := 0; i < 10; i++ { 55 s := []rune(p.Sprint(number.Decimal(i))) 56 if len(s) == 1 { 57 out[i] = s[0] 58 } else { 59 out[i] = runeNone 60 } 61 } 62 } 63 64 // decimalFormat defines how a decimal (base-10) number should be parsed for a 65 // given locale. Note that the behaviour is undefined for locales that have 66 // non-base-10 number systems. 67 // 68 // This structure is currently internal until we have more confidence it is 69 // correct for all languages with decimal number systems. 70 type decimalFormat struct { 71 // GroupSeparator is a digits separator such as commas for thousands. In 72 // addition to any separator defined here, a parser will ignore whitespace. 73 GroupSeparator rune 74 75 // Point is separator between the integer and fractional part of 76 // a decimal number. 77 Point rune 78 79 // Digits are an ascending list of digit runes 80 Digits [10]rune 81 } 82 83 func (f decimalFormat) ParseInt(s string) (int64, error) { 84 if len(s) == 0 { return 0, strconv.ErrSyntax } 85 86 value, length, err := f.AcceptInt(s) 87 88 if err != nil { return 0, err } 89 if len(s) != length { return 0, strconv.ErrSyntax } 90 91 return value, nil 92 } 93 94 func (f decimalFormat) ParseFloat(s string) (float64, error) { 95 if len(s) == 0 { return 0, strconv.ErrSyntax } 96 97 value, length, err := f.AcceptFloat(s) 98 99 if err != nil { return 0, err } 100 if len(s) != length { return 0, strconv.ErrSyntax } 101 102 return value, nil 103 } 104 105 // NewDecimalFormat constructs, for a given locale, a NumberFormat that 106 // defines how a decimal (base-10) number should be parsed. Note that the 107 // behaviour is undefined for locales that have non-base-10 number systems. 108 func NewDecimalFormat(tag language.Tag) NumberFormat { 109 110 // Unfortunately, I couldn't find any exported symbols in /x/text that 111 // gives this information directly (as would be ideal). Therefore this 112 // function works by printing numbers in the current locale and using 113 // heuristics to guess the correct separators. 114 115 p := message.NewPrinter(tag) 116 117 format := decimalFormat{ 118 GroupSeparator: guessDecimalGroupSeparator(p), 119 Point: guessDecimalPoint(p), 120 } 121 122 guessDecimalDigits(p, &format.Digits) 123 124 return format 125 } 126 127 // returns (0-9, true) for a decimal digit in any language, or (_, false) 128 func decimalRuneToInt(d rune, digits *[10]rune) (int, bool) { 129 for i := 0; i < 10; i++ { 130 if d == digits[i] { return i, true } 131 } 132 return 0, false 133 } 134 135 func (f decimalFormat) leadingZeros(s string) int { 136 zeros := 0 137 for _, c := range s { 138 zero, ok := decimalRuneToInt(c, &f.Digits) 139 if !ok { continue } 140 if zero == 0 { zeros++; continue } 141 return zeros 142 } 143 return 0 144 } 145 146 // AcceptInteger parses as much of an integer number as possible. It returns a 147 // 2 tuple: the value of the parsed integer, and the length of the characters 148 // successfully parsed. For example, for some locales, the string "1,000X" 149 // returns (1000, 5) and the string "foo" returns (0, 0). 150 // 151 // Err is always nil, strconv.ErrRange or strconv.ErrSyntax 152 func (f decimalFormat) AcceptInt(s string) (value int64, length int, err error) { 153 154 if len(s) == 0 { return 0, 0, nil } 155 156 if s[0] == '-' { 157 // TODO better negative check e.g. "(1)" for "-1" 158 v, l, _ := f.AcceptUint(s[1:]) 159 // TODO bounds check 160 if l > 0 { 161 return int64(v) * -1, l + 1, nil 162 } else { 163 return 0, 0, nil 164 } 165 } 166 167 // TODO bounds check 168 v, l, err := f.AcceptUint(s) 169 return int64(v), l, nil 170 } 171 172 // AcceptUint: see AcceptInt 173 func (f decimalFormat) AcceptUint(s string) (value uint64, length int, err error) { 174 var accu uint64 175 176 for i, c := range s { 177 if c == f.GroupSeparator { 178 // pass 179 } else if unicode.IsSpace(c) { 180 // pass 181 } else if d, ok := decimalRuneToInt(c, &f.Digits); ok { 182 accu *= 10 183 accu += uint64(d) 184 // TODO bounds check 185 } else { 186 // TODO this count is runes but should be bytes! 187 return accu, i, nil 188 } 189 } 190 191 return accu, len(s), nil 192 } 193 194 // AcceptFloat parses as much of a floating point number as possible. It returns 195 // a 2 tuple: the value of the parsed float, and the length of the characters 196 // successfully parsed. For example, for some locales, the string "1.23X" 197 // returns (1.23, 4) and the string "foo" returns (0.0, 0). 198 // 199 // Err is always nil, strconv.ErrRange or strconv.ErrSyntax 200 func (f decimalFormat) AcceptFloat(s string) (value float64, length int, err error) { 201 var left, right int64 202 var leftLen, rightLen, pointLen int 203 var fLeft, fRight float64 204 205 // accept leading decimal point 206 if first, ok := firstRune(s); ok && first != f.Point { 207 left, leftLen, err = f.AcceptInt(s) 208 // TODO check err (Currently always nil) 209 if leftLen == 0 { return 0, 0, nil } 210 fLeft = float64(left) 211 } 212 213 pointLen = acceptRune(f.Point, s[leftLen:]) 214 leadingZeros := 0 215 216 if pointLen > 0 && (s[leftLen +pointLen] != '-') { 217 218 leadingZeros = f.leadingZeros(s[leftLen +pointLen:]) 219 220 right, rightLen, err = f.AcceptInt(s[leftLen +pointLen:]) 221 // TODO check err (currently always nil) 222 } 223 224 if right > 0.0 { 225 fRight = float64(right) 226 places := 1.0 + math.Floor(math.Log10(fRight)) + float64(leadingZeros) 227 fRight *= math.Pow(0.1, places) 228 fRight = math.Copysign(fRight, fLeft) 229 } 230 231 value = fLeft + fRight 232 length = leftLen + pointLen + rightLen 233 234 return value, length, nil 235 }