github.com/go-xe2/third@v1.0.3/golang.org/x/text/encoding/ianaindex/ianaindex.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen.go
     6  
     7  // Package ianaindex maps names to Encodings as specified by the IANA registry.
     8  // This includes both the MIME and IANA names.
     9  //
    10  // See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
    11  // more details.
    12  package ianaindex
    13  
    14  import (
    15  	"errors"
    16  	"sort"
    17  	"strings"
    18  
    19  	"github.com/go-xe2/third/golang.org/x/text/encoding"
    20  	"github.com/go-xe2/third/golang.org/x/text/encoding/charmap"
    21  	"github.com/go-xe2/third/golang.org/x/text/encoding/internal/identifier"
    22  	"github.com/go-xe2/third/golang.org/x/text/encoding/japanese"
    23  	"github.com/go-xe2/third/golang.org/x/text/encoding/korean"
    24  	"github.com/go-xe2/third/golang.org/x/text/encoding/simplifiedchinese"
    25  	"github.com/go-xe2/third/golang.org/x/text/encoding/traditionalchinese"
    26  	"github.com/go-xe2/third/golang.org/x/text/encoding/unicode"
    27  )
    28  
    29  // TODO: remove the "Status... incomplete" in the package doc comment.
    30  // TODO: allow users to specify their own aliases?
    31  // TODO: allow users to specify their own indexes?
    32  // TODO: allow canonicalizing names
    33  
    34  // NOTE: only use these top-level variables if we can get the linker to drop
    35  // the indexes when they are not used. Make them a function or perhaps only
    36  // support MIME otherwise.
    37  
    38  var (
    39  	// MIME is an index to map MIME names.
    40  	MIME *Index = mime
    41  
    42  	// IANA is an index that supports all names and aliases using IANA names as
    43  	// the canonical identifier.
    44  	IANA *Index = iana
    45  
    46  	// MIB is an index that associates the MIB display name with an Encoding.
    47  	MIB *Index = mib
    48  
    49  	mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]}
    50  	iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]}
    51  	mib  = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]}
    52  )
    53  
    54  // Index maps names registered by IANA to Encodings.
    55  // Currently different Indexes only differ in the names they return for
    56  // encodings. In the future they may also differ in supported aliases.
    57  type Index struct {
    58  	names func(i int) string
    59  	toMIB []identifier.MIB // Sorted slice of supported MIBs
    60  	alias map[string]int
    61  	enc   []encoding.Encoding
    62  }
    63  
    64  var (
    65  	errInvalidName = errors.New("ianaindex: invalid encoding name")
    66  	errUnknown     = errors.New("ianaindex: unknown Encoding")
    67  	errUnsupported = errors.New("ianaindex: unsupported Encoding")
    68  )
    69  
    70  // Encoding returns an Encoding for IANA-registered names. Matching is
    71  // case-insensitive.
    72  func (x *Index) Encoding(name string) (encoding.Encoding, error) {
    73  	name = strings.TrimSpace(name)
    74  	// First try without lowercasing (possibly creating an allocation).
    75  	i, ok := x.alias[name]
    76  	if !ok {
    77  		i, ok = x.alias[strings.ToLower(name)]
    78  		if !ok {
    79  			return nil, errInvalidName
    80  		}
    81  	}
    82  	return x.enc[i], nil
    83  }
    84  
    85  // Name reports the canonical name of the given Encoding. It will return an
    86  // error if the e is not associated with a known encoding scheme.
    87  func (x *Index) Name(e encoding.Encoding) (string, error) {
    88  	id, ok := e.(identifier.Interface)
    89  	if !ok {
    90  		return "", errUnknown
    91  	}
    92  	mib, _ := id.ID()
    93  	if mib == 0 {
    94  		return "", errUnknown
    95  	}
    96  	v := findMIB(x.toMIB, mib)
    97  	if v == -1 {
    98  		return "", errUnsupported
    99  	}
   100  	return x.names(v), nil
   101  }
   102  
   103  // TODO: the coverage of this index is rather spotty. Allowing users to set
   104  // encodings would allow:
   105  // - users to increase coverage
   106  // - allow a partially loaded set of encodings in case the user doesn't need to
   107  //   them all.
   108  // - write an OS-specific wrapper for supported encodings and set them.
   109  // The exact definition of Set depends a bit on if and how we want to let users
   110  // write their own Encoding implementations. Also, it is not possible yet to
   111  // only partially load the encodings without doing some refactoring. Until this
   112  // is solved, we might as well not support Set.
   113  // // Set sets the e to be used for the encoding scheme identified by name. Only
   114  // // canonical names may be used. An empty name assigns e to its internally
   115  // // associated encoding scheme.
   116  // func (x *Index) Set(name string, e encoding.Encoding) error {
   117  // 	panic("TODO: implement")
   118  // }
   119  
   120  func findMIB(x []identifier.MIB, mib identifier.MIB) int {
   121  	i := sort.Search(len(x), func(i int) bool { return x[i] >= mib })
   122  	if i < len(x) && x[i] == mib {
   123  		return i
   124  	}
   125  	return -1
   126  }
   127  
   128  const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
   129  
   130  func mimeName(x int) string {
   131  	n := ianaNames[x]
   132  	// See gen.go for a description of the encoding.
   133  	if n[0] <= maxMIMENameLen {
   134  		return n[1:n[0]]
   135  	}
   136  	return n
   137  }
   138  
   139  func ianaName(x int) string {
   140  	n := ianaNames[x]
   141  	// See gen.go for a description of the encoding.
   142  	if n[0] <= maxMIMENameLen {
   143  		return n[n[0]:]
   144  	}
   145  	return n
   146  }
   147  
   148  func mibName(x int) string {
   149  	return mibNames[x]
   150  }
   151  
   152  var encodings = [numIANA]encoding.Encoding{
   153  	enc106:  unicode.UTF8,
   154  	enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
   155  	enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
   156  	enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
   157  	enc2028: charmap.CodePage037,
   158  	enc2011: charmap.CodePage437,
   159  	enc2009: charmap.CodePage850,
   160  	enc2010: charmap.CodePage852,
   161  	enc2046: charmap.CodePage855,
   162  	enc2089: charmap.CodePage858,
   163  	enc2048: charmap.CodePage860,
   164  	enc2013: charmap.CodePage862,
   165  	enc2050: charmap.CodePage863,
   166  	enc2052: charmap.CodePage865,
   167  	enc2086: charmap.CodePage866,
   168  	enc2102: charmap.CodePage1047,
   169  	enc2091: charmap.CodePage1140,
   170  	enc4:    charmap.ISO8859_1,
   171  	enc5:    charmap.ISO8859_2,
   172  	enc6:    charmap.ISO8859_3,
   173  	enc7:    charmap.ISO8859_4,
   174  	enc8:    charmap.ISO8859_5,
   175  	enc9:    charmap.ISO8859_6,
   176  	enc81:   charmap.ISO8859_6E,
   177  	enc82:   charmap.ISO8859_6I,
   178  	enc10:   charmap.ISO8859_7,
   179  	enc11:   charmap.ISO8859_8,
   180  	enc84:   charmap.ISO8859_8E,
   181  	enc85:   charmap.ISO8859_8I,
   182  	enc12:   charmap.ISO8859_9,
   183  	enc13:   charmap.ISO8859_10,
   184  	enc109:  charmap.ISO8859_13,
   185  	enc110:  charmap.ISO8859_14,
   186  	enc111:  charmap.ISO8859_15,
   187  	enc112:  charmap.ISO8859_16,
   188  	enc2084: charmap.KOI8R,
   189  	enc2088: charmap.KOI8U,
   190  	enc2027: charmap.Macintosh,
   191  	enc2109: charmap.Windows874,
   192  	enc2250: charmap.Windows1250,
   193  	enc2251: charmap.Windows1251,
   194  	enc2252: charmap.Windows1252,
   195  	enc2253: charmap.Windows1253,
   196  	enc2254: charmap.Windows1254,
   197  	enc2255: charmap.Windows1255,
   198  	enc2256: charmap.Windows1256,
   199  	enc2257: charmap.Windows1257,
   200  	enc2258: charmap.Windows1258,
   201  	enc18:   japanese.EUCJP,
   202  	enc39:   japanese.ISO2022JP,
   203  	enc17:   japanese.ShiftJIS,
   204  	enc38:   korean.EUCKR,
   205  	enc114:  simplifiedchinese.GB18030,
   206  	enc113:  simplifiedchinese.GBK,
   207  	enc2085: simplifiedchinese.HZGB2312,
   208  	enc2026: traditionalchinese.Big5,
   209  }