github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/golang/text/language/parse.go

github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/golang/text/language/parse.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  
    15  	"github.com/insionng/yougam/libraries/x/text/internal/tag"
    16  )
    17  
    18  // isAlpha returns true if the byte is not a digit.
    19  // b must be an ASCII letter or digit.
    20  func isAlpha(b byte) bool {
    21  	return b > '9'
    22  }
    23  
    24  // isAlphaNum returns true if the string contains only ASCII letters or digits.
    25  func isAlphaNum(s []byte) bool {
    26  	for _, c := range s {
    27  		if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
    28  			return false
    29  		}
    30  	}
    31  	return true
    32  }
    33  
    34  // errSyntax is returned by any of the parsing functions when the
    35  // input is not well-formed, according to BCP 47.
    36  // TODO: return the position at which the syntax error occurred?
    37  var errSyntax = errors.New("language: tag is not well-formed")
    38  
    39  // ValueError is returned by any of the parsing functions when the
    40  // input is well-formed but the respective subtag is not recognized
    41  // as a valid value.
    42  type ValueError struct {
    43  	v [8]byte
    44  }
    45  
    46  func mkErrInvalid(s []byte) error {
    47  	var e ValueError
    48  	copy(e.v[:], s)
    49  	return e
    50  }
    51  
    52  func (e ValueError) tag() []byte {
    53  	n := bytes.IndexByte(e.v[:], 0)
    54  	if n == -1 {
    55  		n = 8
    56  	}
    57  	return e.v[:n]
    58  }
    59  
    60  // Error implements the error interface.
    61  func (e ValueError) Error() string {
    62  	return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
    63  }
    64  
    65  // Subtag returns the subtag for which the error occurred.
    66  func (e ValueError) Subtag() string {
    67  	return string(e.tag())
    68  }
    69  
    70  // scanner is used to scan BCP 47 tokens, which are separated by _ or -.
    71  type scanner struct {
    72  	b     []byte
    73  	bytes [max99thPercentileSize]byte
    74  	token []byte
    75  	start int // start position of the current token
    76  	end   int // end position of the current token
    77  	next  int // next point for scan
    78  	err   error
    79  	done  bool
    80  }
    81  
    82  func makeScannerString(s string) scanner {
    83  	scan := scanner{}
    84  	if len(s) <= len(scan.bytes) {
    85  		scan.b = scan.bytes[:copy(scan.bytes[:], s)]
    86  	} else {
    87  		scan.b = []byte(s)
    88  	}
    89  	scan.init()
    90  	return scan
    91  }
    92  
    93  // makeScanner returns a scanner using b as the input buffer.
    94  // b is not copied and may be modified by the scanner routines.
    95  func makeScanner(b []byte) scanner {
    96  	scan := scanner{b: b}
    97  	scan.init()
    98  	return scan
    99  }
   100  
   101  func (s *scanner) init() {
   102  	for i, c := range s.b {
   103  		if c == '_' {
   104  			s.b[i] = '-'
   105  		}
   106  	}
   107  	s.scan()
   108  }
   109  
   110  // restToLower converts the string between start and end to lower case.
   111  func (s *scanner) toLower(start, end int) {
   112  	for i := start; i < end; i++ {
   113  		c := s.b[i]
   114  		if 'A' <= c && c <= 'Z' {
   115  			s.b[i] += 'a' - 'A'
   116  		}
   117  	}
   118  }
   119  
   120  func (s *scanner) setError(e error) {
   121  	if s.err == nil || (e == errSyntax && s.err != errSyntax) {
   122  		s.err = e
   123  	}
   124  }
   125  
   126  // resizeRange shrinks or grows the array at position oldStart such that
   127  // a new string of size newSize can fit between oldStart and oldEnd.
   128  // Sets the scan point to after the resized range.
   129  func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
   130  	s.start = oldStart
   131  	if end := oldStart + newSize; end != oldEnd {
   132  		diff := end - oldEnd
   133  		if end < cap(s.b) {
   134  			b := make([]byte, len(s.b)+diff)
   135  			copy(b, s.b[:oldStart])
   136  			copy(b[end:], s.b[oldEnd:])
   137  			s.b = b
   138  		} else {
   139  			s.b = append(s.b[end:], s.b[oldEnd:]...)
   140  		}
   141  		s.next = end + (s.next - s.end)
   142  		s.end = end
   143  	}
   144  }
   145  
   146  // replace replaces the current token with repl.
   147  func (s *scanner) replace(repl string) {
   148  	s.resizeRange(s.start, s.end, len(repl))
   149  	copy(s.b[s.start:], repl)
   150  }
   151  
   152  // gobble removes the current token from the input.
   153  // Caller must call scan after calling gobble.
   154  func (s *scanner) gobble(e error) {
   155  	s.setError(e)
   156  	if s.start == 0 {
   157  		s.b = s.b[:+copy(s.b, s.b[s.next:])]
   158  		s.end = 0
   159  	} else {
   160  		s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
   161  		s.end = s.start - 1
   162  	}
   163  	s.next = s.start
   164  }
   165  
   166  // deleteRange removes the given range from s.b before the current token.
   167  func (s *scanner) deleteRange(start, end int) {
   168  	s.setError(errSyntax)
   169  	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
   170  	diff := end - start
   171  	s.next -= diff
   172  	s.start -= diff
   173  	s.end -= diff
   174  }
   175  
   176  // scan parses the next token of a BCP 47 string.  Tokens that are larger
   177  // than 8 characters or include non-alphanumeric characters result in an error
   178  // and are gobbled and removed from the output.
   179  // It returns the end position of the last token consumed.
   180  func (s *scanner) scan() (end int) {
   181  	end = s.end
   182  	s.token = nil
   183  	for s.start = s.next; s.next < len(s.b); {
   184  		i := bytes.IndexByte(s.b[s.next:], '-')
   185  		if i == -1 {
   186  			s.end = len(s.b)
   187  			s.next = len(s.b)
   188  			i = s.end - s.start
   189  		} else {
   190  			s.end = s.next + i
   191  			s.next = s.end + 1
   192  		}
   193  		token := s.b[s.start:s.end]
   194  		if i < 1 || i > 8 || !isAlphaNum(token) {
   195  			s.gobble(errSyntax)
   196  			continue
   197  		}
   198  		s.token = token
   199  		return end
   200  	}
   201  	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
   202  		s.setError(errSyntax)
   203  		s.b = s.b[:len(s.b)-1]
   204  	}
   205  	s.done = true
   206  	return end
   207  }
   208  
   209  // acceptMinSize parses multiple tokens of the given size or greater.
   210  // It returns the end position of the last token consumed.
   211  func (s *scanner) acceptMinSize(min int) (end int) {
   212  	end = s.end
   213  	s.scan()
   214  	for ; len(s.token) >= min; s.scan() {
   215  		end = s.end
   216  	}
   217  	return end
   218  }
   219  
   220  // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
   221  // failed it returns an error and any part of the tag that could be parsed.
   222  // If parsing succeeded but an unknown value was found, it returns
   223  // ValueError. The Tag returned in this case is just stripped of the unknown
   224  // value. All other values are preserved. It accepts tags in the BCP 47 format
   225  // and extensions to this standard defined in
   226  // http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
   227  // The resulting tag is canonicalized using the default canonicalization type.
   228  func Parse(s string) (t Tag, err error) {
   229  	return Default.Parse(s)
   230  }
   231  
   232  // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
   233  // failed it returns an error and any part of the tag that could be parsed.
   234  // If parsing succeeded but an unknown value was found, it returns
   235  // ValueError. The Tag returned in this case is just stripped of the unknown
   236  // value. All other values are preserved. It accepts tags in the BCP 47 format
   237  // and extensions to this standard defined in
   238  // http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
   239  // The resulting tag is canonicalized using the the canonicalization type c.
   240  func (c CanonType) Parse(s string) (t Tag, err error) {
   241  	// TODO: consider supporting old-style locale key-value pairs.
   242  	if s == "" {
   243  		return und, errSyntax
   244  	}
   245  	if len(s) <= maxAltTaglen {
   246  		b := [maxAltTaglen]byte{}
   247  		for i, c := range s {
   248  			// Generating invalid UTF-8 is okay as it won't match.
   249  			if 'A' <= c && c <= 'Z' {
   250  				c += 'a' - 'A'
   251  			} else if c == '_' {
   252  				c = '-'
   253  			}
   254  			b[i] = byte(c)
   255  		}
   256  		if t, ok := grandfathered(b); ok {
   257  			return t, nil
   258  		}
   259  	}
   260  	scan := makeScannerString(s)
   261  	t, err = parse(&scan, s)
   262  	t, changed := t.canonicalize(c)
   263  	if changed {
   264  		t.remakeString()
   265  	}
   266  	return t, err
   267  }
   268  
   269  func parse(scan *scanner, s string) (t Tag, err error) {
   270  	t = und
   271  	var end int
   272  	if n := len(scan.token); n <= 1 {
   273  		scan.toLower(0, len(scan.b))
   274  		if n == 0 || scan.token[0] != 'x' {
   275  			return t, errSyntax
   276  		}
   277  		end = parseExtensions(scan)
   278  	} else if n >= 4 {
   279  		return und, errSyntax
   280  	} else { // the usual case
   281  		t, end = parseTag(scan)
   282  		if n := len(scan.token); n == 1 {
   283  			t.pExt = uint16(end)
   284  			end = parseExtensions(scan)
   285  		} else if end < len(scan.b) {
   286  			scan.setError(errSyntax)
   287  			scan.b = scan.b[:end]
   288  		}
   289  	}
   290  	if int(t.pVariant) < len(scan.b) {
   291  		if end < len(s) {
   292  			s = s[:end]
   293  		}
   294  		if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
   295  			t.str = s
   296  		} else {
   297  			t.str = string(scan.b)
   298  		}
   299  	} else {
   300  		t.pVariant, t.pExt = 0, 0
   301  	}
   302  	return t, scan.err
   303  }
   304  
   305  // parseTag parses language, script, region and variants.
   306  // It returns a Tag and the end position in the input that was parsed.
   307  func parseTag(scan *scanner) (t Tag, end int) {
   308  	var e error
   309  	// TODO: set an error if an unknown lang, script or region is encountered.
   310  	t.lang, e = getLangID(scan.token)
   311  	scan.setError(e)
   312  	scan.replace(t.lang.String())
   313  	langStart := scan.start
   314  	end = scan.scan()
   315  	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
   316  		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
   317  		// to a tag of the form <extlang>.
   318  		lang, e := getLangID(scan.token)
   319  		if lang != 0 {
   320  			t.lang = lang
   321  			copy(scan.b[langStart:], lang.String())
   322  			scan.b[langStart+3] = '-'
   323  			scan.start = langStart + 4
   324  		}
   325  		scan.gobble(e)
   326  		end = scan.scan()
   327  	}
   328  	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
   329  		t.script, e = getScriptID(script, scan.token)
   330  		if t.script == 0 {
   331  			scan.gobble(e)
   332  		}
   333  		end = scan.scan()
   334  	}
   335  	if n := len(scan.token); n >= 2 && n <= 3 {
   336  		t.region, e = getRegionID(scan.token)
   337  		if t.region == 0 {
   338  			scan.gobble(e)
   339  		} else {
   340  			scan.replace(t.region.String())
   341  		}
   342  		end = scan.scan()
   343  	}
   344  	scan.toLower(scan.start, len(scan.b))
   345  	t.pVariant = byte(end)
   346  	end = parseVariants(scan, end, t)
   347  	t.pExt = uint16(end)
   348  	return t, end
   349  }
   350  
   351  var separator = []byte{'-'}
   352  
   353  // parseVariants scans tokens as long as each token is a valid variant string.
   354  // Duplicate variants are removed.
   355  func parseVariants(scan *scanner, end int, t Tag) int {
   356  	start := scan.start
   357  	varIDBuf := [4]uint8{}
   358  	variantBuf := [4][]byte{}
   359  	varID := varIDBuf[:0]
   360  	variant := variantBuf[:0]
   361  	last := -1
   362  	needSort := false
   363  	for ; len(scan.token) >= 4; scan.scan() {
   364  		// TODO: measure the impact of needing this conversion and redesign
   365  		// the data structure if there is an issue.
   366  		v, ok := variantIndex[string(scan.token)]
   367  		if !ok {
   368  			// unknown variant
   369  			// TODO: allow user-defined variants?
   370  			scan.gobble(mkErrInvalid(scan.token))
   371  			continue
   372  		}
   373  		varID = append(varID, v)
   374  		variant = append(variant, scan.token)
   375  		if !needSort {
   376  			if last < int(v) {
   377  				last = int(v)
   378  			} else {
   379  				needSort = true
   380  				// There is no legal combinations of more than 7 variants
   381  				// (and this is by no means a useful sequence).
   382  				const maxVariants = 8
   383  				if len(varID) > maxVariants {
   384  					break
   385  				}
   386  			}
   387  		}
   388  		end = scan.end
   389  	}
   390  	if needSort {
   391  		sort.Sort(variantsSort{varID, variant})
   392  		k, l := 0, -1
   393  		for i, v := range varID {
   394  			w := int(v)
   395  			if l == w {
   396  				// Remove duplicates.
   397  				continue
   398  			}
   399  			varID[k] = varID[i]
   400  			variant[k] = variant[i]
   401  			k++
   402  			l = w
   403  		}
   404  		if str := bytes.Join(variant[:k], separator); len(str) == 0 {
   405  			end = start - 1
   406  		} else {
   407  			scan.resizeRange(start, end, len(str))
   408  			copy(scan.b[scan.start:], str)
   409  			end = scan.end
   410  		}
   411  	}
   412  	return end
   413  }
   414  
   415  type variantsSort struct {
   416  	i []uint8
   417  	v [][]byte
   418  }
   419  
   420  func (s variantsSort) Len() int {
   421  	return len(s.i)
   422  }
   423  
   424  func (s variantsSort) Swap(i, j int) {
   425  	s.i[i], s.i[j] = s.i[j], s.i[i]
   426  	s.v[i], s.v[j] = s.v[j], s.v[i]
   427  }
   428  
   429  func (s variantsSort) Less(i, j int) bool {
   430  	return s.i[i] < s.i[j]
   431  }
   432  
   433  type bytesSort [][]byte
   434  
   435  func (b bytesSort) Len() int {
   436  	return len(b)
   437  }
   438  
   439  func (b bytesSort) Swap(i, j int) {
   440  	b[i], b[j] = b[j], b[i]
   441  }
   442  
   443  func (b bytesSort) Less(i, j int) bool {
   444  	return bytes.Compare(b[i], b[j]) == -1
   445  }
   446  
   447  // parseExtensions parses and normalizes the extensions in the buffer.
   448  // It returns the last position of scan.b that is part of any extension.
   449  // It also trims scan.b to remove excess parts accordingly.
   450  func parseExtensions(scan *scanner) int {
   451  	start := scan.start
   452  	exts := [][]byte{}
   453  	private := []byte{}
   454  	end := scan.end
   455  	for len(scan.token) == 1 {
   456  		extStart := scan.start
   457  		ext := scan.token[0]
   458  		end = parseExtension(scan)
   459  		extension := scan.b[extStart:end]
   460  		if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
   461  			scan.setError(errSyntax)
   462  			end = extStart
   463  			continue
   464  		} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
   465  			scan.b = scan.b[:end]
   466  			return end
   467  		} else if ext == 'x' {
   468  			private = extension
   469  			break
   470  		}
   471  		exts = append(exts, extension)
   472  	}
   473  	sort.Sort(bytesSort(exts))
   474  	if len(private) > 0 {
   475  		exts = append(exts, private)
   476  	}
   477  	scan.b = scan.b[:start]
   478  	if len(exts) > 0 {
   479  		scan.b = append(scan.b, bytes.Join(exts, separator)...)
   480  	} else if start > 0 {
   481  		// Strip trailing '-'.
   482  		scan.b = scan.b[:start-1]
   483  	}
   484  	return end
   485  }
   486  
   487  // parseExtension parses a single extension and returns the position of
   488  // the extension end.
   489  func parseExtension(scan *scanner) int {
   490  	start, end := scan.start, scan.end
   491  	switch scan.token[0] {
   492  	case 'u':
   493  		attrStart := end
   494  		scan.scan()
   495  		for last := []byte{}; len(scan.token) > 2; scan.scan() {
   496  			if bytes.Compare(scan.token, last) != -1 {
   497  				// Attributes are unsorted. Start over from scratch.
   498  				p := attrStart + 1
   499  				scan.next = p
   500  				attrs := [][]byte{}
   501  				for scan.scan(); len(scan.token) > 2; scan.scan() {
   502  					attrs = append(attrs, scan.token)
   503  					end = scan.end
   504  				}
   505  				sort.Sort(bytesSort(attrs))
   506  				copy(scan.b[p:], bytes.Join(attrs, separator))
   507  				break
   508  			}
   509  			last = scan.token
   510  			end = scan.end
   511  		}
   512  		var last, key []byte
   513  		for attrEnd := end; len(scan.token) == 2; last = key {
   514  			key = scan.token
   515  			keyEnd := scan.end
   516  			end = scan.acceptMinSize(3)
   517  			// TODO: check key value validity
   518  			if keyEnd == end || bytes.Compare(key, last) != 1 {
   519  				// We have an invalid key or the keys are not sorted.
   520  				// Start scanning keys from scratch and reorder.
   521  				p := attrEnd + 1
   522  				scan.next = p
   523  				keys := [][]byte{}
   524  				for scan.scan(); len(scan.token) == 2; {
   525  					keyStart, keyEnd := scan.start, scan.end
   526  					end = scan.acceptMinSize(3)
   527  					if keyEnd != end {
   528  						keys = append(keys, scan.b[keyStart:end])
   529  					} else {
   530  						scan.setError(errSyntax)
   531  						end = keyStart
   532  					}
   533  				}
   534  				sort.Sort(bytesSort(keys))
   535  				reordered := bytes.Join(keys, separator)
   536  				if e := p + len(reordered); e < end {
   537  					scan.deleteRange(e, end)
   538  					end = e
   539  				}
   540  				copy(scan.b[p:], bytes.Join(keys, separator))
   541  				break
   542  			}
   543  		}
   544  	case 't':
   545  		scan.scan()
   546  		if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
   547  			_, end = parseTag(scan)
   548  			scan.toLower(start, end)
   549  		}
   550  		for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
   551  			end = scan.acceptMinSize(3)
   552  		}
   553  	case 'x':
   554  		end = scan.acceptMinSize(1)
   555  	default:
   556  		end = scan.acceptMinSize(2)
   557  	}
   558  	return end
   559  }
   560  
   561  // Compose creates a Tag from individual parts, which may be of type Tag, Base,
   562  // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
   563  // Base, Script or Region or slice of type Variant or Extension is passed more
   564  // than once, the latter will overwrite the former. Variants and Extensions are
   565  // accumulated, but if two extensions of the same type are passed, the latter
   566  // will replace the former. A Tag overwrites all former values and typically
   567  // only makes sense as the first argument. The resulting tag is returned after
   568  // canonicalizing using the Default CanonType. If one or more errors are
   569  // encountered, one of the errors is returned.
   570  func Compose(part ...interface{}) (t Tag, err error) {
   571  	return Default.Compose(part...)
   572  }
   573  
   574  // Compose creates a Tag from individual parts, which may be of type Tag, Base,
   575  // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
   576  // Base, Script or Region or slice of type Variant or Extension is passed more
   577  // than once, the latter will overwrite the former. Variants and Extensions are
   578  // accumulated, but if two extensions of the same type are passed, the latter
   579  // will replace the former. A Tag overwrites all former values and typically
   580  // only makes sense as the first argument. The resulting tag is returned after
   581  // canonicalizing using CanonType c. If one or more errors are encountered,
   582  // one of the errors is returned.
   583  func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
   584  	var b builder
   585  	if err = b.update(part...); err != nil {
   586  		return und, err
   587  	}
   588  	t, _ = b.tag.canonicalize(c)
   589  
   590  	if len(b.ext) > 0 || len(b.variant) > 0 {
   591  		sort.Sort(sortVariant(b.variant))
   592  		sort.Strings(b.ext)
   593  		if b.private != "" {
   594  			b.ext = append(b.ext, b.private)
   595  		}
   596  		n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
   597  		buf := make([]byte, n)
   598  		p := t.genCoreBytes(buf)
   599  		t.pVariant = byte(p)
   600  		p += appendTokens(buf[p:], b.variant...)
   601  		t.pExt = uint16(p)
   602  		p += appendTokens(buf[p:], b.ext...)
   603  		t.str = string(buf[:p])
   604  	} else if b.private != "" {
   605  		t.str = b.private
   606  		t.remakeString()
   607  	}
   608  	return
   609  }
   610  
   611  type builder struct {
   612  	tag Tag
   613  
   614  	private string // the x extension
   615  	ext     []string
   616  	variant []string
   617  
   618  	err error
   619  }
   620  
   621  func (b *builder) addExt(e string) {
   622  	if e == "" {
   623  	} else if e[0] == 'x' {
   624  		b.private = e
   625  	} else {
   626  		b.ext = append(b.ext, e)
   627  	}
   628  }
   629  
   630  var errInvalidArgument = errors.New("invalid Extension or Variant")
   631  
   632  func (b *builder) update(part ...interface{}) (err error) {
   633  	replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
   634  		if s == "" {
   635  			b.err = errInvalidArgument
   636  			return true
   637  		}
   638  		for i, v := range *l {
   639  			if eq(v, s) {
   640  				(*l)[i] = s
   641  				return true
   642  			}
   643  		}
   644  		return false
   645  	}
   646  	for _, x := range part {
   647  		switch v := x.(type) {
   648  		case Tag:
   649  			b.tag.lang = v.lang
   650  			b.tag.region = v.region
   651  			b.tag.script = v.script
   652  			if v.str != "" {
   653  				b.variant = nil
   654  				for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
   655  					x, s = nextToken(s)
   656  					b.variant = append(b.variant, x)
   657  				}
   658  				b.ext, b.private = nil, ""
   659  				for i, e := int(v.pExt), ""; i < len(v.str); {
   660  					i, e = getExtension(v.str, i)
   661  					b.addExt(e)
   662  				}
   663  			}
   664  		case Base:
   665  			b.tag.lang = v.langID
   666  		case Script:
   667  			b.tag.script = v.scriptID
   668  		case Region:
   669  			b.tag.region = v.regionID
   670  		case Variant:
   671  			if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
   672  				b.variant = append(b.variant, v.variant)
   673  			}
   674  		case Extension:
   675  			if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
   676  				b.addExt(v.s)
   677  			}
   678  		case []Variant:
   679  			b.variant = nil
   680  			for _, x := range v {
   681  				b.update(x)
   682  			}
   683  		case []Extension:
   684  			b.ext, b.private = nil, ""
   685  			for _, e := range v {
   686  				b.update(e)
   687  			}
   688  		// TODO: support parsing of raw strings based on morphology or just extensions?
   689  		case error:
   690  			err = v
   691  		}
   692  	}
   693  	return
   694  }
   695  
   696  func tokenLen(token ...string) (n int) {
   697  	for _, t := range token {
   698  		n += len(t) + 1
   699  	}
   700  	return
   701  }
   702  
   703  func appendTokens(b []byte, token ...string) int {
   704  	p := 0
   705  	for _, t := range token {
   706  		b[p] = '-'
   707  		copy(b[p+1:], t)
   708  		p += 1 + len(t)
   709  	}
   710  	return p
   711  }
   712  
   713  type sortVariant []string
   714  
   715  func (s sortVariant) Len() int {
   716  	return len(s)
   717  }
   718  
   719  func (s sortVariant) Swap(i, j int) {
   720  	s[j], s[i] = s[i], s[j]
   721  }
   722  
   723  func (s sortVariant) Less(i, j int) bool {
   724  	return variantIndex[s[i]] < variantIndex[s[j]]
   725  }
   726  
   727  func findExt(list []string, x byte) int {
   728  	for i, e := range list {
   729  		if e[0] == x {
   730  			return i
   731  		}
   732  	}
   733  	return -1
   734  }
   735  
   736  // getExtension returns the name, body and end position of the extension.
   737  func getExtension(s string, p int) (end int, ext string) {
   738  	if s[p] == '-' {
   739  		p++
   740  	}
   741  	if s[p] == 'x' {
   742  		return len(s), s[p:]
   743  	}
   744  	end = nextExtension(s, p)
   745  	return end, s[p:end]
   746  }
   747  
   748  // nextExtension finds the next extension within the string, searching
   749  // for the -<char>- pattern from position p.
   750  // In the fast majority of cases, language tags will have at most
   751  // one extension and extensions tend to be small.
   752  func nextExtension(s string, p int) int {
   753  	for n := len(s) - 3; p < n; {
   754  		if s[p] == '-' {
   755  			if s[p+2] == '-' {
   756  				return p
   757  			}
   758  			p += 3
   759  		} else {
   760  			p++
   761  		}
   762  	}
   763  	return len(s)
   764  }
   765  
   766  var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
   767  
   768  // ParseAcceptLanguage parses the contents of a Accept-Language header as
   769  // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
   770  // a list of corresponding quality weights. It is more permissive than RFC 2616
   771  // and may return non-nil slices even if the input is not valid.
   772  // The Tags will be sorted by highest weight first and then by first occurrence.
   773  // Tags with a weight of zero will be dropped. An error will be returned if the
   774  // input could not be parsed.
   775  func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
   776  	var entry string
   777  	for s != "" {
   778  		if entry, s = split(s, ','); entry == "" {
   779  			continue
   780  		}
   781  
   782  		entry, weight := split(entry, ';')
   783  
   784  		// Scan the language.
   785  		t, err := Parse(entry)
   786  		if err != nil {
   787  			id, ok := acceptFallback[entry]
   788  			if !ok {
   789  				return nil, nil, err
   790  			}
   791  			t = Tag{lang: id}
   792  		}
   793  
   794  		// Scan the optional weight.
   795  		w := 1.0
   796  		if weight != "" {
   797  			weight = consume(weight, 'q')
   798  			weight = consume(weight, '=')
   799  			// consume returns the empty string when a token could not be
   800  			// consumed, resulting in an error for ParseFloat.
   801  			if w, err = strconv.ParseFloat(weight, 32); err != nil {
   802  				return nil, nil, errInvalidWeight
   803  			}
   804  			// Drop tags with a quality weight of 0.
   805  			if w <= 0 {
   806  				continue
   807  			}
   808  		}
   809  
   810  		tag = append(tag, t)
   811  		q = append(q, float32(w))
   812  	}
   813  	sortStable(&tagSort{tag, q})
   814  	return tag, q, nil
   815  }
   816  
   817  // consume removes a leading token c from s and returns the result or the empty
   818  // string if there is no such token.
   819  func consume(s string, c byte) string {
   820  	if s == "" || s[0] != c {
   821  		return ""
   822  	}
   823  	return strings.TrimSpace(s[1:])
   824  }
   825  
   826  func split(s string, c byte) (head, tail string) {
   827  	if i := strings.IndexByte(s, c); i >= 0 {
   828  		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
   829  	}
   830  	return strings.TrimSpace(s), ""
   831  }
   832  
   833  // Add hack mapping to deal with a small number of cases that that occur
   834  // in Accept-Language (with reasonable frequency).
   835  var acceptFallback = map[string]langID{
   836  	"english": _en,
   837  	"deutsch": _de,
   838  	"italian": _it,
   839  	"french":  _fr,
   840  	"*":       _mul, // defined in the spec to match all languages.
   841  }
   842  
   843  type tagSort struct {
   844  	tag []Tag
   845  	q   []float32
   846  }
   847  
   848  func (s *tagSort) Len() int {
   849  	return len(s.q)
   850  }
   851  
   852  func (s *tagSort) Less(i, j int) bool {
   853  	return s.q[i] > s.q[j]
   854  }
   855  
   856  func (s *tagSort) Swap(i, j int) {
   857  	s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
   858  	s.q[i], s.q[j] = s.q[j], s.q[i]
   859  }