github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/internal/format/pattern.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package format
     6  
     7  import (
     8  	"errors"
     9  	"unicode/utf8"
    10  )
    11  
    12  // This file contains a parser for the CLDR number patterns as described in
    13  // http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
    14  //
    15  // The following BNF is derived from this standard.
    16  //
    17  // pattern    := subpattern (';' subpattern)?
    18  // subpattern := affix? number exponent? affix?
    19  // number     := decimal | sigDigits
    20  // decimal    := '#'* '0'* ('.' fraction)? | '#' | '0'
    21  // fraction   := '0'* '#'*
    22  // sigDigits  := '#'* '@' '@'* '#'*
    23  // exponent   := 'E' '+'? '0'* '0'
    24  // padSpec    := '*' \L
    25  //
    26  // Notes:
    27  // - An affix pattern may contain any runes, but runes with special meaning
    28  //   should be escaped.
    29  // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
    30  //   interstitial commas.
    31  
    32  // TODO: replace special characters in affixes (-, +, ¤) with control codes.
    33  
    34  // NumberFormat holds information for formatting numbers. It is designed to
    35  // hold information from CLDR number patterns.
    36  //
    37  // This pattern is precompiled  for all patterns for all languages. Even though
    38  // the number of patterns is not very large, we want to keep this small.
    39  //
    40  // This type is only intended for internal use.
    41  type NumberFormat struct {
    42  	// TODO: this struct can be packed a lot better than it is now. Should be
    43  	// possible to make it 32 bytes.
    44  
    45  	Affix     string // includes prefix and suffix. First byte is prefix length.
    46  	Offset    uint16 // Offset into Affix for prefix and suffix
    47  	NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
    48  
    49  	Multiplier     uint32
    50  	RoundIncrement uint32 // Use Min*Digits to determine scale
    51  	PadRune        rune
    52  
    53  	FormatWidth uint16
    54  
    55  	GroupingSize [2]uint8
    56  	Flags        NumberFormatFlag
    57  
    58  	// Number of digits.
    59  	MinIntegerDigits     uint8
    60  	MaxIntegerDigits     uint8
    61  	MinFractionDigits    uint8
    62  	MaxFractionDigits    uint8
    63  	MinSignificantDigits uint8
    64  	MaxSignificantDigits uint8
    65  	MinExponentDigits    uint8
    66  }
    67  
    68  // A NumberFormatFlag is a bit mask for the flag field of a NumberFormat.
    69  type NumberFormatFlag uint8
    70  
    71  const (
    72  	AlwaysSign NumberFormatFlag = 1 << iota
    73  	AlwaysExpSign
    74  	AlwaysDecimalSeparator
    75  	ParenthesisForNegative // Common pattern. Saves space.
    76  
    77  	PadAfterNumber
    78  	PadAfterAffix
    79  
    80  	PadBeforePrefix = 0 // Default
    81  	PadAfterPrefix  = PadAfterAffix
    82  	PadBeforeSuffix = PadAfterNumber
    83  	PadAfterSuffix  = PadAfterNumber | PadAfterAffix
    84  	PadMask         = PadAfterNumber | PadAfterAffix
    85  )
    86  
    87  type parser struct {
    88  	*NumberFormat
    89  
    90  	leadingSharps int
    91  
    92  	pos            int
    93  	err            error
    94  	doNotTerminate bool
    95  	groupingCount  uint
    96  	hasGroup       bool
    97  	buf            []byte
    98  }
    99  
   100  func (p *parser) setError(err error) {
   101  	if p.err == nil {
   102  		p.err = err
   103  	}
   104  }
   105  
   106  func (p *parser) updateGrouping() {
   107  	if p.hasGroup && p.groupingCount < 255 {
   108  		p.GroupingSize[1] = p.GroupingSize[0]
   109  		p.GroupingSize[0] = uint8(p.groupingCount)
   110  	}
   111  	p.groupingCount = 0
   112  	p.hasGroup = true
   113  }
   114  
   115  var (
   116  	// TODO: more sensible and localizeable error messages.
   117  	errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
   118  	errInvalidPadSpecifier   = errors.New("format: invalid pad specifier")
   119  	errInvalidQuote          = errors.New("format: invalid quote")
   120  	errAffixTooLarge         = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
   121  	errDuplicatePercentSign  = errors.New("format: duplicate percent sign")
   122  	errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
   123  	errUnexpectedEnd         = errors.New("format: unexpected end of pattern")
   124  )
   125  
   126  // ParseNumberPattern extracts formatting information from a CLDR number
   127  // pattern.
   128  //
   129  // See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
   130  func ParseNumberPattern(s string) (f *NumberFormat, err error) {
   131  	p := parser{NumberFormat: &NumberFormat{}}
   132  
   133  	s = p.parseSubPattern(s)
   134  
   135  	if s != "" {
   136  		// Parse negative sub pattern.
   137  		if s[0] != ';' {
   138  			p.setError(errors.New("format: error parsing first sub pattern"))
   139  			return nil, p.err
   140  		}
   141  		neg := parser{NumberFormat: &NumberFormat{}} // just for extracting the affixes.
   142  		s = neg.parseSubPattern(s[len(";"):])
   143  		p.NegOffset = uint16(len(p.buf))
   144  		p.buf = append(p.buf, neg.buf...)
   145  	}
   146  	if s != "" {
   147  		p.setError(errors.New("format: spurious characters at end of pattern"))
   148  	}
   149  	if p.err != nil {
   150  		return nil, p.err
   151  	}
   152  	if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
   153  		// No prefix or suffixes.
   154  		p.NegOffset = 0
   155  	} else {
   156  		p.Affix = affix
   157  	}
   158  	return p.NumberFormat, nil
   159  }
   160  
   161  func (p *parser) parseSubPattern(s string) string {
   162  	s = p.parsePad(s, PadBeforePrefix)
   163  	s = p.parseAffix(s)
   164  	s = p.parsePad(s, PadAfterPrefix)
   165  
   166  	s = p.parse(p.number, s)
   167  
   168  	s = p.parsePad(s, PadBeforeSuffix)
   169  	s = p.parseAffix(s)
   170  	s = p.parsePad(s, PadAfterSuffix)
   171  	return s
   172  }
   173  
   174  func (p *parser) parsePad(s string, f NumberFormatFlag) (tail string) {
   175  	if len(s) >= 2 && s[0] == '*' {
   176  		r, sz := utf8.DecodeRuneInString(s[1:])
   177  		if p.PadRune != 0 {
   178  			p.err = errMultiplePadSpecifiers
   179  		} else {
   180  			p.Flags |= f
   181  			p.PadRune = r
   182  		}
   183  		return s[1+sz:]
   184  	}
   185  	return s
   186  }
   187  
   188  func (p *parser) parseAffix(s string) string {
   189  	x := len(p.buf)
   190  	p.buf = append(p.buf, 0) // placeholder for affix length
   191  
   192  	s = p.parse(p.affix, s)
   193  
   194  	n := len(p.buf) - x - 1
   195  	if n > 0xFF {
   196  		p.setError(errAffixTooLarge)
   197  	}
   198  	p.buf[x] = uint8(n)
   199  	return s
   200  }
   201  
   202  // state implements a state transition. It returns the new state. A state
   203  // function may set an error on the parser or may simply return on an incorrect
   204  // token and let the next phase fail.
   205  type state func(r rune) state
   206  
   207  // parse repeatedly applies a state function on the given string until a
   208  // termination condition is reached.
   209  func (p *parser) parse(fn state, s string) (tail string) {
   210  	for i, r := range s {
   211  		p.doNotTerminate = false
   212  		if fn = fn(r); fn == nil || p.err != nil {
   213  			return s[i:]
   214  		}
   215  		p.FormatWidth++
   216  	}
   217  	if p.doNotTerminate {
   218  		p.setError(errUnexpectedEnd)
   219  	}
   220  	return ""
   221  }
   222  
   223  func (p *parser) affix(r rune) state {
   224  	switch r {
   225  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   226  		'#', '@', '.', '*', ',', ';':
   227  		return nil
   228  	case '\'':
   229  		return p.escape
   230  	case '%':
   231  		if p.Multiplier != 0 {
   232  			p.setError(errDuplicatePercentSign)
   233  		}
   234  		p.Multiplier = 100
   235  	case '\u2030': // ‰ Per mille
   236  		if p.Multiplier != 0 {
   237  			p.setError(errDuplicatePermilleSign)
   238  		}
   239  		p.Multiplier = 1000
   240  		// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
   241  	}
   242  	p.buf = append(p.buf, string(r)...)
   243  	return p.affix
   244  }
   245  
   246  func (p *parser) escape(r rune) state {
   247  	switch r {
   248  	case '\'':
   249  		return p.affix
   250  	default:
   251  		p.buf = append(p.buf, string(r)...)
   252  	}
   253  	return p.escape
   254  }
   255  
   256  // number parses a number. The BNF says the integer part should always have
   257  // a '0', but that does not appear to be the case according to the rest of the
   258  // documentation. We will allow having only '#' numbers.
   259  func (p *parser) number(r rune) state {
   260  	switch r {
   261  	case '#':
   262  		p.groupingCount++
   263  		p.leadingSharps++
   264  	case '@':
   265  		p.groupingCount++
   266  		p.leadingSharps = 0
   267  		return p.sigDigits(r)
   268  	case ',':
   269  		if p.leadingSharps == 0 { // no leading commas
   270  			return nil
   271  		}
   272  		p.updateGrouping()
   273  	case 'E':
   274  		p.MaxIntegerDigits = uint8(p.leadingSharps)
   275  		return p.exponent
   276  	case '.': // allow ".##" etc.
   277  		p.updateGrouping()
   278  		return p.fraction
   279  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   280  		return p.integer(r)
   281  	default:
   282  		return nil
   283  	}
   284  	return p.number
   285  }
   286  
   287  func (p *parser) integer(r rune) state {
   288  	if !('0' <= r && r <= '9') {
   289  		var next state
   290  		switch r {
   291  		case 'E':
   292  			if p.leadingSharps > 0 {
   293  				p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
   294  			}
   295  			next = p.exponent
   296  		case '.':
   297  			next = p.fraction
   298  		}
   299  		p.updateGrouping()
   300  		return next
   301  	}
   302  	p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
   303  	p.groupingCount++
   304  	p.MinIntegerDigits++
   305  	return p.integer
   306  }
   307  
   308  func (p *parser) sigDigits(r rune) state {
   309  	switch r {
   310  	case '@':
   311  		p.groupingCount++
   312  		p.MaxSignificantDigits++
   313  		p.MinSignificantDigits++
   314  	case '#':
   315  		return p.sigDigitsFinal(r)
   316  	case 'E':
   317  		p.updateGrouping()
   318  		return p.normalizeSigDigitsWithExponent()
   319  	default:
   320  		p.updateGrouping()
   321  		return nil
   322  	}
   323  	return p.sigDigits
   324  }
   325  
   326  func (p *parser) sigDigitsFinal(r rune) state {
   327  	switch r {
   328  	case '#':
   329  		p.groupingCount++
   330  		p.MaxSignificantDigits++
   331  	case 'E':
   332  		p.updateGrouping()
   333  		return p.normalizeSigDigitsWithExponent()
   334  	default:
   335  		p.updateGrouping()
   336  		return nil
   337  	}
   338  	return p.sigDigitsFinal
   339  }
   340  
   341  func (p *parser) normalizeSigDigitsWithExponent() state {
   342  	p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
   343  	p.MinFractionDigits = p.MinSignificantDigits - 1
   344  	p.MaxFractionDigits = p.MaxSignificantDigits - 1
   345  	p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
   346  	return p.exponent
   347  }
   348  
   349  func (p *parser) fraction(r rune) state {
   350  	switch r {
   351  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   352  		p.RoundIncrement = p.RoundIncrement*10 + uint32(r-'0')
   353  		p.MinFractionDigits++
   354  		p.MaxFractionDigits++
   355  	case '#':
   356  		p.MaxFractionDigits++
   357  	case 'E':
   358  		if p.leadingSharps > 0 {
   359  			p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
   360  		}
   361  		return p.exponent
   362  	default:
   363  		return nil
   364  	}
   365  	return p.fraction
   366  }
   367  
   368  func (p *parser) exponent(r rune) state {
   369  	switch r {
   370  	case '+':
   371  		// Set mode and check it wasn't already set.
   372  		if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
   373  			break
   374  		}
   375  		p.Flags |= AlwaysExpSign
   376  		p.doNotTerminate = true
   377  		return p.exponent
   378  	case '0':
   379  		p.MinExponentDigits++
   380  		return p.exponent
   381  	}
   382  	// termination condition
   383  	if p.MinExponentDigits == 0 {
   384  		p.setError(errors.New("format: need at least one digit"))
   385  	}
   386  	return nil
   387  }