github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/internal/format/pattern.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package format 6 7 import ( 8 "errors" 9 "unicode/utf8" 10 ) 11 12 // This file contains a parser for the CLDR number patterns as described in 13 // http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns. 14 // 15 // The following BNF is derived from this standard. 16 // 17 // pattern := subpattern (';' subpattern)? 18 // subpattern := affix? number exponent? affix? 19 // number := decimal | sigDigits 20 // decimal := '#'* '0'* ('.' fraction)? | '#' | '0' 21 // fraction := '0'* '#'* 22 // sigDigits := '#'* '@' '@'* '#'* 23 // exponent := 'E' '+'? '0'* '0' 24 // padSpec := '*' \L 25 // 26 // Notes: 27 // - An affix pattern may contain any runes, but runes with special meaning 28 // should be escaped. 29 // - Sequences of digits, '#', and '@' in decimal and sigDigits may have 30 // interstitial commas. 31 32 // TODO: replace special characters in affixes (-, +, ¤) with control codes. 33 34 // NumberFormat holds information for formatting numbers. It is designed to 35 // hold information from CLDR number patterns. 36 // 37 // This pattern is precompiled for all patterns for all languages. Even though 38 // the number of patterns is not very large, we want to keep this small. 39 // 40 // This type is only intended for internal use. 41 type NumberFormat struct { 42 // TODO: this struct can be packed a lot better than it is now. Should be 43 // possible to make it 32 bytes. 44 45 Affix string // includes prefix and suffix. First byte is prefix length. 46 Offset uint16 // Offset into Affix for prefix and suffix 47 NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0. 48 49 Multiplier uint32 50 RoundIncrement uint32 // Use Min*Digits to determine scale 51 PadRune rune 52 53 FormatWidth uint16 54 55 GroupingSize [2]uint8 56 Flags NumberFormatFlag 57 58 // Number of digits. 59 MinIntegerDigits uint8 60 MaxIntegerDigits uint8 61 MinFractionDigits uint8 62 MaxFractionDigits uint8 63 MinSignificantDigits uint8 64 MaxSignificantDigits uint8 65 MinExponentDigits uint8 66 } 67 68 // A NumberFormatFlag is a bit mask for the flag field of a NumberFormat. 69 type NumberFormatFlag uint8 70 71 const ( 72 AlwaysSign NumberFormatFlag = 1 << iota 73 AlwaysExpSign 74 AlwaysDecimalSeparator 75 ParenthesisForNegative // Common pattern. Saves space. 76 77 PadAfterNumber 78 PadAfterAffix 79 80 PadBeforePrefix = 0 // Default 81 PadAfterPrefix = PadAfterAffix 82 PadBeforeSuffix = PadAfterNumber 83 PadAfterSuffix = PadAfterNumber | PadAfterAffix 84 PadMask = PadAfterNumber | PadAfterAffix 85 ) 86 87 type parser struct { 88 *NumberFormat 89 90 leadingSharps int 91 92 pos int 93 err error 94 doNotTerminate bool 95 groupingCount uint 96 hasGroup bool 97 buf []byte 98 } 99 100 func (p *parser) setError(err error) { 101 if p.err == nil { 102 p.err = err 103 } 104 } 105 106 func (p *parser) updateGrouping() { 107 if p.hasGroup && p.groupingCount < 255 { 108 p.GroupingSize[1] = p.GroupingSize[0] 109 p.GroupingSize[0] = uint8(p.groupingCount) 110 } 111 p.groupingCount = 0 112 p.hasGroup = true 113 } 114 115 var ( 116 // TODO: more sensible and localizeable error messages. 117 errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers") 118 errInvalidPadSpecifier = errors.New("format: invalid pad specifier") 119 errInvalidQuote = errors.New("format: invalid quote") 120 errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes") 121 errDuplicatePercentSign = errors.New("format: duplicate percent sign") 122 errDuplicatePermilleSign = errors.New("format: duplicate permille sign") 123 errUnexpectedEnd = errors.New("format: unexpected end of pattern") 124 ) 125 126 // ParseNumberPattern extracts formatting information from a CLDR number 127 // pattern. 128 // 129 // See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns. 130 func ParseNumberPattern(s string) (f *NumberFormat, err error) { 131 p := parser{NumberFormat: &NumberFormat{}} 132 133 s = p.parseSubPattern(s) 134 135 if s != "" { 136 // Parse negative sub pattern. 137 if s[0] != ';' { 138 p.setError(errors.New("format: error parsing first sub pattern")) 139 return nil, p.err 140 } 141 neg := parser{NumberFormat: &NumberFormat{}} // just for extracting the affixes. 142 s = neg.parseSubPattern(s[len(";"):]) 143 p.NegOffset = uint16(len(p.buf)) 144 p.buf = append(p.buf, neg.buf...) 145 } 146 if s != "" { 147 p.setError(errors.New("format: spurious characters at end of pattern")) 148 } 149 if p.err != nil { 150 return nil, p.err 151 } 152 if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" { 153 // No prefix or suffixes. 154 p.NegOffset = 0 155 } else { 156 p.Affix = affix 157 } 158 return p.NumberFormat, nil 159 } 160 161 func (p *parser) parseSubPattern(s string) string { 162 s = p.parsePad(s, PadBeforePrefix) 163 s = p.parseAffix(s) 164 s = p.parsePad(s, PadAfterPrefix) 165 166 s = p.parse(p.number, s) 167 168 s = p.parsePad(s, PadBeforeSuffix) 169 s = p.parseAffix(s) 170 s = p.parsePad(s, PadAfterSuffix) 171 return s 172 } 173 174 func (p *parser) parsePad(s string, f NumberFormatFlag) (tail string) { 175 if len(s) >= 2 && s[0] == '*' { 176 r, sz := utf8.DecodeRuneInString(s[1:]) 177 if p.PadRune != 0 { 178 p.err = errMultiplePadSpecifiers 179 } else { 180 p.Flags |= f 181 p.PadRune = r 182 } 183 return s[1+sz:] 184 } 185 return s 186 } 187 188 func (p *parser) parseAffix(s string) string { 189 x := len(p.buf) 190 p.buf = append(p.buf, 0) // placeholder for affix length 191 192 s = p.parse(p.affix, s) 193 194 n := len(p.buf) - x - 1 195 if n > 0xFF { 196 p.setError(errAffixTooLarge) 197 } 198 p.buf[x] = uint8(n) 199 return s 200 } 201 202 // state implements a state transition. It returns the new state. A state 203 // function may set an error on the parser or may simply return on an incorrect 204 // token and let the next phase fail. 205 type state func(r rune) state 206 207 // parse repeatedly applies a state function on the given string until a 208 // termination condition is reached. 209 func (p *parser) parse(fn state, s string) (tail string) { 210 for i, r := range s { 211 p.doNotTerminate = false 212 if fn = fn(r); fn == nil || p.err != nil { 213 return s[i:] 214 } 215 p.FormatWidth++ 216 } 217 if p.doNotTerminate { 218 p.setError(errUnexpectedEnd) 219 } 220 return "" 221 } 222 223 func (p *parser) affix(r rune) state { 224 switch r { 225 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 226 '#', '@', '.', '*', ',', ';': 227 return nil 228 case '\'': 229 return p.escape 230 case '%': 231 if p.Multiplier != 0 { 232 p.setError(errDuplicatePercentSign) 233 } 234 p.Multiplier = 100 235 case '\u2030': // ‰ Per mille 236 if p.Multiplier != 0 { 237 p.setError(errDuplicatePermilleSign) 238 } 239 p.Multiplier = 1000 240 // TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤ 241 } 242 p.buf = append(p.buf, string(r)...) 243 return p.affix 244 } 245 246 func (p *parser) escape(r rune) state { 247 switch r { 248 case '\'': 249 return p.affix 250 default: 251 p.buf = append(p.buf, string(r)...) 252 } 253 return p.escape 254 } 255 256 // number parses a number. The BNF says the integer part should always have 257 // a '0', but that does not appear to be the case according to the rest of the 258 // documentation. We will allow having only '#' numbers. 259 func (p *parser) number(r rune) state { 260 switch r { 261 case '#': 262 p.groupingCount++ 263 p.leadingSharps++ 264 case '@': 265 p.groupingCount++ 266 p.leadingSharps = 0 267 return p.sigDigits(r) 268 case ',': 269 if p.leadingSharps == 0 { // no leading commas 270 return nil 271 } 272 p.updateGrouping() 273 case 'E': 274 p.MaxIntegerDigits = uint8(p.leadingSharps) 275 return p.exponent 276 case '.': // allow ".##" etc. 277 p.updateGrouping() 278 return p.fraction 279 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 280 return p.integer(r) 281 default: 282 return nil 283 } 284 return p.number 285 } 286 287 func (p *parser) integer(r rune) state { 288 if !('0' <= r && r <= '9') { 289 var next state 290 switch r { 291 case 'E': 292 if p.leadingSharps > 0 { 293 p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits 294 } 295 next = p.exponent 296 case '.': 297 next = p.fraction 298 } 299 p.updateGrouping() 300 return next 301 } 302 p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0') 303 p.groupingCount++ 304 p.MinIntegerDigits++ 305 return p.integer 306 } 307 308 func (p *parser) sigDigits(r rune) state { 309 switch r { 310 case '@': 311 p.groupingCount++ 312 p.MaxSignificantDigits++ 313 p.MinSignificantDigits++ 314 case '#': 315 return p.sigDigitsFinal(r) 316 case 'E': 317 p.updateGrouping() 318 return p.normalizeSigDigitsWithExponent() 319 default: 320 p.updateGrouping() 321 return nil 322 } 323 return p.sigDigits 324 } 325 326 func (p *parser) sigDigitsFinal(r rune) state { 327 switch r { 328 case '#': 329 p.groupingCount++ 330 p.MaxSignificantDigits++ 331 case 'E': 332 p.updateGrouping() 333 return p.normalizeSigDigitsWithExponent() 334 default: 335 p.updateGrouping() 336 return nil 337 } 338 return p.sigDigitsFinal 339 } 340 341 func (p *parser) normalizeSigDigitsWithExponent() state { 342 p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1 343 p.MinFractionDigits = p.MinSignificantDigits - 1 344 p.MaxFractionDigits = p.MaxSignificantDigits - 1 345 p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0 346 return p.exponent 347 } 348 349 func (p *parser) fraction(r rune) state { 350 switch r { 351 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 352 p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0') 353 p.MinFractionDigits++ 354 p.MaxFractionDigits++ 355 case '#': 356 p.MaxFractionDigits++ 357 case 'E': 358 if p.leadingSharps > 0 { 359 p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits 360 } 361 return p.exponent 362 default: 363 return nil 364 } 365 return p.fraction 366 } 367 368 func (p *parser) exponent(r rune) state { 369 switch r { 370 case '+': 371 // Set mode and check it wasn't already set. 372 if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 { 373 break 374 } 375 p.Flags |= AlwaysExpSign 376 p.doNotTerminate = true 377 return p.exponent 378 case '0': 379 p.MinExponentDigits++ 380 return p.exponent 381 } 382 // termination condition 383 if p.MinExponentDigits == 0 { 384 p.setError(errors.New("format: need at least one digit")) 385 } 386 return nil 387 }