github.com/dolthub/go-mysql-server@v0.18.0/sql/charactersets.go (about)

     1  // Copyright 2022-2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sql
    16  
    17  import (
    18  	"strings"
    19  
    20  	"github.com/dolthub/go-mysql-server/sql/encodings"
    21  )
    22  
    23  // CharacterSet represents the character set of a string.
    24  type CharacterSet struct {
    25  	ID               CharacterSetID
    26  	Name             string
    27  	DefaultCollation CollationID
    28  	BinaryCollation  CollationID
    29  	Description      string
    30  	MaxLength        uint8
    31  	Encoder          encodings.Encoder
    32  }
    33  
    34  // CharacterSetsIterator iterates over every character set available.
    35  type CharacterSetsIterator struct {
    36  	idx int
    37  }
    38  
    39  // CharacterSetID represents a character set. Unlike collations, this ID is not intended for storage and may change as
    40  // the default collation changes. It is recommended to use the character set's name if persistence is desired.
    41  type CharacterSetID uint16
    42  
    43  // The character sets below are ordered alphabetically to make it easier to visually parse them.
    44  // As each ID acts as an index to the `characterSetArray`, they are explicitly defined.
    45  // A character set's ID is defined as the default collation's ID.
    46  
    47  const (
    48  	CharacterSet_armscii8 CharacterSetID = 32
    49  	CharacterSet_ascii    CharacterSetID = 11
    50  	CharacterSet_big5     CharacterSetID = 1
    51  	CharacterSet_binary   CharacterSetID = 63
    52  	CharacterSet_cp1250   CharacterSetID = 26
    53  	CharacterSet_cp1251   CharacterSetID = 51
    54  	CharacterSet_cp1256   CharacterSetID = 57
    55  	CharacterSet_cp1257   CharacterSetID = 59
    56  	CharacterSet_cp850    CharacterSetID = 4
    57  	CharacterSet_cp852    CharacterSetID = 40
    58  	CharacterSet_cp866    CharacterSetID = 36
    59  	CharacterSet_cp932    CharacterSetID = 95
    60  	CharacterSet_dec8     CharacterSetID = 3
    61  	CharacterSet_eucjpms  CharacterSetID = 97
    62  	CharacterSet_euckr    CharacterSetID = 19
    63  	CharacterSet_gb18030  CharacterSetID = 248
    64  	CharacterSet_gb2312   CharacterSetID = 24
    65  	CharacterSet_gbk      CharacterSetID = 28
    66  	CharacterSet_geostd8  CharacterSetID = 92
    67  	CharacterSet_greek    CharacterSetID = 25
    68  	CharacterSet_hebrew   CharacterSetID = 16
    69  	CharacterSet_hp8      CharacterSetID = 6
    70  	CharacterSet_keybcs2  CharacterSetID = 37
    71  	CharacterSet_koi8r    CharacterSetID = 7
    72  	CharacterSet_koi8u    CharacterSetID = 22
    73  	CharacterSet_latin1   CharacterSetID = 8
    74  	CharacterSet_latin2   CharacterSetID = 9
    75  	CharacterSet_latin5   CharacterSetID = 30
    76  	CharacterSet_latin7   CharacterSetID = 41
    77  	CharacterSet_macce    CharacterSetID = 38
    78  	CharacterSet_macroman CharacterSetID = 39
    79  	CharacterSet_sjis     CharacterSetID = 13
    80  	CharacterSet_swe7     CharacterSetID = 10
    81  	CharacterSet_tis620   CharacterSetID = 18
    82  	CharacterSet_ucs2     CharacterSetID = 35
    83  	CharacterSet_ujis     CharacterSetID = 12
    84  	CharacterSet_utf16    CharacterSetID = 54
    85  	CharacterSet_utf16le  CharacterSetID = 56
    86  	CharacterSet_utf32    CharacterSetID = 60
    87  	CharacterSet_utf8mb3  CharacterSetID = 33
    88  	CharacterSet_utf8mb4  CharacterSetID = 255
    89  
    90  	CharacterSet_utf8 = CharacterSet_utf8mb3
    91  
    92  	// CharacterSet_Unspecified is used when a character set has not been specified, either explicitly or implicitly.
    93  	// This is usually used as an intermediate character set to be later replaced by an analyzer pass or a plan,
    94  	// although it is valid to use it directly. When used, behaves identically to the character set belonging to the
    95  	// default collation, although it will NOT match the aforementioned character set.
    96  	CharacterSet_Unspecified CharacterSetID = 0
    97  )
    98  
    99  // characterSetArray contains the details of every character set, indexed by their ID. This allows for character sets to
   100  // be efficiently passed around (since only an uint16 is needed), while still being able to quickly access all of their
   101  // properties (index lookups are significantly faster than map lookups).
   102  var characterSetArray = [256]CharacterSet{
   103  	/*000*/ {CharacterSet_Unspecified, "", Collation_Unspecified, Collation_Unspecified, "", 0, nil},
   104  	/*001*/ {CharacterSet_big5, "big5", Collation_big5_chinese_ci, Collation_big5_bin, "Big5 Traditional Chinese", 2, nil},
   105  	/*002*/ {},
   106  	/*003*/ {CharacterSet_dec8, "dec8", Collation_dec8_swedish_ci, Collation_dec8_bin, "DEC West European", 1, encodings.Dec8},
   107  	/*004*/ {CharacterSet_cp850, "cp850", Collation_cp850_general_ci, Collation_cp850_bin, "DOS West European", 1, nil},
   108  	/*005*/ {},
   109  	/*006*/ {CharacterSet_hp8, "hp8", Collation_hp8_english_ci, Collation_hp8_bin, "HP West European", 1, nil},
   110  	/*007*/ {CharacterSet_koi8r, "koi8r", Collation_koi8r_general_ci, Collation_koi8r_bin, "KOI8-R Relcom Russian", 1, nil},
   111  	/*008*/ {CharacterSet_latin1, "latin1", Collation_latin1_swedish_ci, Collation_latin1_bin, "cp1252 West European", 1, encodings.Latin1},
   112  	/*009*/ {CharacterSet_latin2, "latin2", Collation_latin2_general_ci, Collation_latin2_bin, "ISO 8859-2 Central European", 1, nil},
   113  	/*010*/ {CharacterSet_swe7, "swe7", Collation_swe7_swedish_ci, Collation_swe7_bin, "7bit Swedish", 1, encodings.Swe7},
   114  	/*011*/ {CharacterSet_ascii, "ascii", Collation_ascii_general_ci, Collation_ascii_bin, "US ASCII", 1, encodings.Ascii},
   115  	/*012*/ {CharacterSet_ujis, "ujis", Collation_ujis_japanese_ci, Collation_ujis_bin, "EUC-JP Japanese", 3, nil},
   116  	/*013*/ {CharacterSet_sjis, "sjis", Collation_sjis_japanese_ci, Collation_sjis_bin, "Shift-JIS Japanese", 2, nil},
   117  	/*014*/ {},
   118  	/*015*/ {},
   119  	/*016*/ {CharacterSet_hebrew, "hebrew", Collation_hebrew_general_ci, Collation_hebrew_bin, "ISO 8859-8 Hebrew", 1, nil},
   120  	/*017*/ {},
   121  	/*018*/ {CharacterSet_tis620, "tis620", Collation_tis620_thai_ci, Collation_tis620_bin, "TIS620 Thai", 1, nil},
   122  	/*019*/ {CharacterSet_euckr, "euckr", Collation_euckr_korean_ci, Collation_euckr_bin, "EUC-KR Korean", 2, nil},
   123  	/*020*/ {},
   124  	/*021*/ {},
   125  	/*022*/ {CharacterSet_koi8u, "koi8u", Collation_koi8u_general_ci, Collation_koi8u_bin, "KOI8-U Ukrainian", 1, nil},
   126  	/*023*/ {},
   127  	/*024*/ {CharacterSet_gb2312, "gb2312", Collation_gb2312_chinese_ci, Collation_gb2312_bin, "GB2312 Simplified Chinese", 2, nil},
   128  	/*025*/ {CharacterSet_greek, "greek", Collation_greek_general_ci, Collation_greek_bin, "ISO 8859-7 Greek", 1, nil},
   129  	/*026*/ {CharacterSet_cp1250, "cp1250", Collation_cp1250_general_ci, Collation_cp1250_bin, "Windows Central European", 1, nil},
   130  	/*027*/ {},
   131  	/*028*/ {CharacterSet_gbk, "gbk", Collation_gbk_chinese_ci, Collation_gbk_bin, "GBK Simplified Chinese", 2, nil},
   132  	/*029*/ {},
   133  	/*030*/ {CharacterSet_latin5, "latin5", Collation_latin5_turkish_ci, Collation_latin5_bin, "ISO 8859-9 Turkish", 1, nil},
   134  	/*031*/ {},
   135  	/*032*/ {CharacterSet_armscii8, "armscii8", Collation_armscii8_general_ci, Collation_armscii8_bin, "ARMSCII-8 Armenian", 1, nil},
   136  	/*033*/ {CharacterSet_utf8mb3, "utf8mb3", Collation_utf8mb3_general_ci, Collation_utf8mb3_bin, "UTF-8 Unicode", 3, encodings.Utf8mb3},
   137  	/*034*/ {},
   138  	/*035*/ {CharacterSet_ucs2, "ucs2", Collation_ucs2_general_ci, Collation_ucs2_bin, "UCS-2 Unicode", 2, nil},
   139  	/*036*/ {CharacterSet_cp866, "cp866", Collation_cp866_general_ci, Collation_cp866_bin, "DOS Russian", 1, nil},
   140  	/*037*/ {CharacterSet_keybcs2, "keybcs2", Collation_keybcs2_general_ci, Collation_keybcs2_bin, "DOS Kamenicky Czech-Slovak", 1, nil},
   141  	/*038*/ {CharacterSet_macce, "macce", Collation_macce_general_ci, Collation_macce_bin, "Mac Central European", 1, nil},
   142  	/*039*/ {CharacterSet_macroman, "macroman", Collation_macroman_general_ci, Collation_macroman_bin, "Mac West European", 1, nil},
   143  	/*040*/ {CharacterSet_cp852, "cp852", Collation_cp852_general_ci, Collation_cp852_bin, "DOS Central European", 1, nil},
   144  	/*041*/ {CharacterSet_latin7, "latin7", Collation_latin7_general_ci, Collation_latin7_bin, "ISO 8859-13 Baltic", 1, encodings.Latin7},
   145  	/*042*/ {},
   146  	/*043*/ {},
   147  	/*044*/ {},
   148  	/*045*/ {},
   149  	/*046*/ {},
   150  	/*047*/ {},
   151  	/*048*/ {},
   152  	/*049*/ {},
   153  	/*050*/ {},
   154  	/*051*/ {CharacterSet_cp1251, "cp1251", Collation_cp1251_general_ci, Collation_cp1251_bin, "Windows Cyrillic", 1, nil},
   155  	/*052*/ {},
   156  	/*053*/ {},
   157  	/*054*/ {CharacterSet_utf16, "utf16", Collation_utf16_general_ci, Collation_utf16_bin, "UTF-16 Unicode", 4, encodings.Utf16},
   158  	/*055*/ {},
   159  	/*056*/ {CharacterSet_utf16le, "utf16le", Collation_utf16le_general_ci, Collation_utf16le_bin, "UTF-16LE Unicode", 4, nil},
   160  	/*057*/ {CharacterSet_cp1256, "cp1256", Collation_cp1256_general_ci, Collation_cp1256_bin, "Windows Arabic", 1, encodings.Cp1256},
   161  	/*058*/ {},
   162  	/*059*/ {CharacterSet_cp1257, "cp1257", Collation_cp1257_general_ci, Collation_cp1257_bin, "Windows Baltic", 1, encodings.Cp1257},
   163  	/*060*/ {CharacterSet_utf32, "utf32", Collation_utf32_general_ci, Collation_utf32_bin, "UTF-32 Unicode", 4, encodings.Utf32},
   164  	/*061*/ {},
   165  	/*062*/ {},
   166  	/*063*/ {CharacterSet_binary, "binary", Collation_binary, Collation_binary, "Binary pseudo charset", 1, encodings.Binary},
   167  	/*064*/ {},
   168  	/*065*/ {},
   169  	/*066*/ {},
   170  	/*067*/ {},
   171  	/*068*/ {},
   172  	/*069*/ {},
   173  	/*070*/ {},
   174  	/*071*/ {},
   175  	/*072*/ {},
   176  	/*073*/ {},
   177  	/*074*/ {},
   178  	/*075*/ {},
   179  	/*076*/ {},
   180  	/*077*/ {},
   181  	/*078*/ {},
   182  	/*079*/ {},
   183  	/*080*/ {},
   184  	/*081*/ {},
   185  	/*082*/ {},
   186  	/*083*/ {},
   187  	/*084*/ {},
   188  	/*085*/ {},
   189  	/*086*/ {},
   190  	/*087*/ {},
   191  	/*088*/ {},
   192  	/*089*/ {},
   193  	/*090*/ {},
   194  	/*091*/ {},
   195  	/*092*/ {CharacterSet_geostd8, "geostd8", Collation_geostd8_general_ci, Collation_geostd8_bin, "GEOSTD8 Georgian", 1, encodings.Geostd8},
   196  	/*093*/ {},
   197  	/*094*/ {},
   198  	/*095*/ {CharacterSet_cp932, "cp932", Collation_cp932_japanese_ci, Collation_cp932_bin, "SJIS for Windows Japanese", 2, nil},
   199  	/*096*/ {},
   200  	/*097*/ {CharacterSet_eucjpms, "eucjpms", Collation_eucjpms_japanese_ci, Collation_eucjpms_bin, "UJIS for Windows Japanese", 3, nil},
   201  	/*098*/ {},
   202  	/*099*/ {},
   203  	/*100*/ {},
   204  	/*101*/ {},
   205  	/*102*/ {},
   206  	/*103*/ {},
   207  	/*104*/ {},
   208  	/*105*/ {},
   209  	/*106*/ {},
   210  	/*107*/ {},
   211  	/*108*/ {},
   212  	/*109*/ {},
   213  	/*110*/ {},
   214  	/*111*/ {},
   215  	/*112*/ {},
   216  	/*113*/ {},
   217  	/*114*/ {},
   218  	/*115*/ {},
   219  	/*116*/ {},
   220  	/*117*/ {},
   221  	/*118*/ {},
   222  	/*119*/ {},
   223  	/*120*/ {},
   224  	/*121*/ {},
   225  	/*122*/ {},
   226  	/*123*/ {},
   227  	/*124*/ {},
   228  	/*125*/ {},
   229  	/*126*/ {},
   230  	/*127*/ {},
   231  	/*128*/ {},
   232  	/*129*/ {},
   233  	/*130*/ {},
   234  	/*131*/ {},
   235  	/*132*/ {},
   236  	/*133*/ {},
   237  	/*134*/ {},
   238  	/*135*/ {},
   239  	/*136*/ {},
   240  	/*137*/ {},
   241  	/*138*/ {},
   242  	/*139*/ {},
   243  	/*140*/ {},
   244  	/*141*/ {},
   245  	/*142*/ {},
   246  	/*143*/ {},
   247  	/*144*/ {},
   248  	/*145*/ {},
   249  	/*146*/ {},
   250  	/*147*/ {},
   251  	/*148*/ {},
   252  	/*149*/ {},
   253  	/*150*/ {},
   254  	/*151*/ {},
   255  	/*152*/ {},
   256  	/*153*/ {},
   257  	/*154*/ {},
   258  	/*155*/ {},
   259  	/*156*/ {},
   260  	/*157*/ {},
   261  	/*158*/ {},
   262  	/*159*/ {},
   263  	/*160*/ {},
   264  	/*161*/ {},
   265  	/*162*/ {},
   266  	/*163*/ {},
   267  	/*164*/ {},
   268  	/*165*/ {},
   269  	/*166*/ {},
   270  	/*167*/ {},
   271  	/*168*/ {},
   272  	/*169*/ {},
   273  	/*170*/ {},
   274  	/*171*/ {},
   275  	/*172*/ {},
   276  	/*173*/ {},
   277  	/*174*/ {},
   278  	/*175*/ {},
   279  	/*176*/ {},
   280  	/*177*/ {},
   281  	/*178*/ {},
   282  	/*179*/ {},
   283  	/*180*/ {},
   284  	/*181*/ {},
   285  	/*182*/ {},
   286  	/*183*/ {},
   287  	/*184*/ {},
   288  	/*185*/ {},
   289  	/*186*/ {},
   290  	/*187*/ {},
   291  	/*188*/ {},
   292  	/*189*/ {},
   293  	/*100*/ {},
   294  	/*191*/ {},
   295  	/*192*/ {},
   296  	/*193*/ {},
   297  	/*194*/ {},
   298  	/*195*/ {},
   299  	/*196*/ {},
   300  	/*197*/ {},
   301  	/*198*/ {},
   302  	/*199*/ {},
   303  	/*200*/ {},
   304  	/*201*/ {},
   305  	/*202*/ {},
   306  	/*203*/ {},
   307  	/*204*/ {},
   308  	/*205*/ {},
   309  	/*206*/ {},
   310  	/*207*/ {},
   311  	/*208*/ {},
   312  	/*209*/ {},
   313  	/*210*/ {},
   314  	/*211*/ {},
   315  	/*212*/ {},
   316  	/*213*/ {},
   317  	/*214*/ {},
   318  	/*215*/ {},
   319  	/*216*/ {},
   320  	/*217*/ {},
   321  	/*218*/ {},
   322  	/*219*/ {},
   323  	/*220*/ {},
   324  	/*221*/ {},
   325  	/*222*/ {},
   326  	/*223*/ {},
   327  	/*224*/ {},
   328  	/*225*/ {},
   329  	/*226*/ {},
   330  	/*227*/ {},
   331  	/*228*/ {},
   332  	/*229*/ {},
   333  	/*230*/ {},
   334  	/*231*/ {},
   335  	/*232*/ {},
   336  	/*233*/ {},
   337  	/*234*/ {},
   338  	/*235*/ {},
   339  	/*236*/ {},
   340  	/*237*/ {},
   341  	/*238*/ {},
   342  	/*239*/ {},
   343  	/*240*/ {},
   344  	/*241*/ {},
   345  	/*242*/ {},
   346  	/*243*/ {},
   347  	/*244*/ {},
   348  	/*245*/ {},
   349  	/*246*/ {},
   350  	/*247*/ {},
   351  	/*248*/ {CharacterSet_gb18030, "gb18030", Collation_gb18030_chinese_ci, Collation_gb18030_bin, "China National Standard GB18030", 4, nil},
   352  	/*249*/ {},
   353  	/*250*/ {},
   354  	/*251*/ {},
   355  	/*252*/ {},
   356  	/*253*/ {},
   357  	/*254*/ {},
   358  	/*255*/ {CharacterSet_utf8mb4, "utf8mb4", Collation_utf8mb4_0900_ai_ci, Collation_utf8mb4_bin, "UTF-8 Unicode", 4, encodings.Utf8mb4},
   359  }
   360  
   361  // init is used to set the unspecified character set's details to match those of the default collation's character set.
   362  func init() {
   363  	defaultCharacterSet := characterSetArray[Collation_Default.CharacterSet()]
   364  	characterSetArray[0].Name = defaultCharacterSet.Name
   365  	characterSetArray[0].Description = defaultCharacterSet.Description
   366  	characterSetArray[0].MaxLength = defaultCharacterSet.MaxLength
   367  	characterSetArray[0].Encoder = defaultCharacterSet.Encoder
   368  }
   369  
   370  // characterSetStringToID maps a character set's name to its ID.
   371  var characterSetStringToID = map[string]CharacterSetID{
   372  	"armscii8": CharacterSet_armscii8,
   373  	"ascii":    CharacterSet_ascii,
   374  	"big5":     CharacterSet_big5,
   375  	"binary":   CharacterSet_binary,
   376  	"cp1250":   CharacterSet_cp1250,
   377  	"cp1251":   CharacterSet_cp1251,
   378  	"cp1256":   CharacterSet_cp1256,
   379  	"cp1257":   CharacterSet_cp1257,
   380  	"cp850":    CharacterSet_cp850,
   381  	"cp852":    CharacterSet_cp852,
   382  	"cp866":    CharacterSet_cp866,
   383  	"cp932":    CharacterSet_cp932,
   384  	"dec8":     CharacterSet_dec8,
   385  	"eucjpms":  CharacterSet_eucjpms,
   386  	"euckr":    CharacterSet_euckr,
   387  	"gb18030":  CharacterSet_gb18030,
   388  	"gb2312":   CharacterSet_gb2312,
   389  	"gbk":      CharacterSet_gbk,
   390  	"geostd8":  CharacterSet_geostd8,
   391  	"greek":    CharacterSet_greek,
   392  	"hebrew":   CharacterSet_hebrew,
   393  	"hp8":      CharacterSet_hp8,
   394  	"keybcs2":  CharacterSet_keybcs2,
   395  	"koi8r":    CharacterSet_koi8r,
   396  	"koi8u":    CharacterSet_koi8u,
   397  	"latin1":   CharacterSet_latin1,
   398  	"latin2":   CharacterSet_latin2,
   399  	"latin5":   CharacterSet_latin5,
   400  	"latin7":   CharacterSet_latin7,
   401  	"macce":    CharacterSet_macce,
   402  	"macroman": CharacterSet_macroman,
   403  	"sjis":     CharacterSet_sjis,
   404  	"swe7":     CharacterSet_swe7,
   405  	"tis620":   CharacterSet_tis620,
   406  	"ucs2":     CharacterSet_ucs2,
   407  	"ujis":     CharacterSet_ujis,
   408  	"utf16":    CharacterSet_utf16,
   409  	"utf16le":  CharacterSet_utf16le,
   410  	"utf32":    CharacterSet_utf32,
   411  	"utf8":     CharacterSet_utf8mb3,
   412  	"utf8mb3":  CharacterSet_utf8mb3,
   413  	"utf8mb4":  CharacterSet_utf8mb4,
   414  }
   415  
   416  // SupportedCharsets contains all non-binary character sets that are currently supported.
   417  var SupportedCharsets = []CharacterSetID{
   418  	CharacterSet_utf8mb4,
   419  }
   420  
   421  // ParseCharacterSet takes in a string representing a CharacterSet and returns the result if a match is found, or an
   422  // error if not.
   423  func ParseCharacterSet(str string) (CharacterSetID, error) {
   424  	if cs, ok := characterSetStringToID[strings.ToLower(str)]; ok {
   425  		return cs, nil
   426  	}
   427  	// It is valid recognize an empty string as the invalid charset, as some analyzer steps may temporarily use the
   428  	// invalid charset
   429  	if str == "" {
   430  		return CharacterSet_Unspecified, nil
   431  	}
   432  	return CharacterSet_Unspecified, ErrCharSetUnknown.New(str)
   433  }
   434  
   435  // Name returns the name of this CharacterSet.
   436  func (cs CharacterSetID) Name() string {
   437  	return characterSetArray[cs].Name
   438  }
   439  
   440  // DefaultCollation returns the default CollationID for this CharacterSet.
   441  func (cs CharacterSetID) DefaultCollation() CollationID {
   442  	return characterSetArray[cs].DefaultCollation
   443  }
   444  
   445  // BinaryCollation returns the binary CollationID for this CharacterSet.
   446  func (cs CharacterSetID) BinaryCollation() CollationID {
   447  	return characterSetArray[cs].BinaryCollation
   448  }
   449  
   450  // Description returns the plain-English description of the CharacterSet.
   451  func (cs CharacterSetID) Description() string {
   452  	return characterSetArray[cs].Description
   453  }
   454  
   455  // MaxLength returns the maximum size of a single character in the CharacterSet.
   456  func (cs CharacterSetID) MaxLength() int64 {
   457  	return int64(characterSetArray[cs].MaxLength)
   458  }
   459  
   460  // String returns the string representation of the CharacterSet.
   461  func (cs CharacterSetID) String() string {
   462  	return characterSetArray[cs].Name
   463  }
   464  
   465  // Encoder returns this CharacterSet's encoder. As character sets are a work-in-progress, it is
   466  // recommended to check if it is nil before allowing the character set to be set within a table.
   467  func (cs CharacterSetID) Encoder() encodings.Encoder {
   468  	return characterSetArray[cs].Encoder
   469  }
   470  
   471  // NewCharacterSetsIterator returns a new CharacterSetsIterator.
   472  func NewCharacterSetsIterator() *CharacterSetsIterator {
   473  	return &CharacterSetsIterator{0}
   474  }
   475  
   476  // Next returns the next character set. If all character sets have been iterated over, returns false.
   477  func (csi *CharacterSetsIterator) Next() (CharacterSet, bool) {
   478  	for ; csi.idx < len(characterSetArray); csi.idx++ {
   479  		if characterSetArray[csi.idx].ID == 0 {
   480  			continue
   481  		}
   482  		csi.idx++
   483  		return characterSetArray[csi.idx-1], true
   484  	}
   485  	return CharacterSet{}, false
   486  }