github.com/dolthub/go-mysql-server@v0.18.0/sql/encodings/generate/main.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  	"hash/fnv"
    21  	"os"
    22  	"sort"
    23  	"strings"
    24  	"unsafe"
    25  
    26  	"golang.org/x/exp/constraints"
    27  )
    28  
    29  var Header = `// Copyright 2023 Dolthub, Inc.
    30  //
    31  // Licensed under the Apache License, Version 2.0 (the "License");
    32  // you may not use this file except in compliance with the License.
    33  // You may obtain a copy of the License at
    34  //
    35  //     http://www.apache.org/licenses/LICENSE-2.0
    36  //
    37  // Unless required by applicable law or agreed to in writing, software
    38  // distributed under the License is distributed on an "AS IS" BASIS,
    39  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    40  // See the License for the specific language governing permissions and
    41  // limitations under the License.
    42  
    43  // THIS FILE IS GENERATED. DO NOT EDIT BY HAND.
    44  
    45  package encodings
    46  
    47  import (
    48  	_ "embed"
    49  	"encoding/binary"
    50  	"sync"
    51  )
    52  
    53  func loadWeightsMap(m map[rune]int32, bin []byte) {
    54  	for i := 0; i < len(bin); i += 8 {
    55  		m[rune(binary.BigEndian.Uint32(bin[i:]))] = int32(binary.BigEndian.Uint32(bin[i+4:]))
    56  	}
    57  }
    58  `
    59  
    60  func main() {
    61  	// Verify that (sizeof(rune) == sizeof(int32)), just in case a future Go version breaks this assumption
    62  	if unsafe.Sizeof(rune(0)) != unsafe.Sizeof(int32(0)) {
    63  		panic("sizeof(rune) != sizeof(int32)")
    64  	}
    65  
    66  	// Hash the contents of all maps
    67  	for k, v := range WeightMaps {
    68  		runesInMap := SortedMapKeys(v)
    69  		hash := fnv.New64a()
    70  		for _, r := range runesInMap {
    71  			sortOrder := v[r]
    72  			_, _ = hash.Write([]byte{byte(r), byte(r >> 8), byte(r >> 16), byte(r >> 24)})
    73  			_, _ = hash.Write([]byte{byte(sortOrder), byte(sortOrder >> 8), byte(sortOrder >> 16), byte(sortOrder >> 24)})
    74  		}
    75  		FileContentHashes[k] = hash.Sum64()
    76  	}
    77  
    78  	// Check for duplicate weight maps
    79  	weightKeys := SortedMapKeys(WeightMaps)
    80  	allDuplicatedMaps := make(map[string][]string)
    81  	for i := 0; i < len(weightKeys); i++ {
    82  		weightKey := weightKeys[i]
    83  		contentHash := FileContentHashes[weightKey]
    84  		var duplicateKeyNames []string
    85  		for j := len(weightKeys) - 1; j > i; j-- {
    86  			compareWeightKey := weightKeys[j]
    87  			if contentHash == FileContentHashes[compareWeightKey] {
    88  				duplicateKeyNames = append(duplicateKeyNames, compareWeightKey)
    89  				weightKeys = append(weightKeys[:j], weightKeys[j+1:]...)
    90  			}
    91  		}
    92  		sort.Strings(duplicateKeyNames)
    93  		// Find the common prefix of all names if they exist, else concatenate all names
    94  		if len(duplicateKeyNames) > 0 {
    95  			// Grab the duplicated map and delete the first key
    96  			duplicatedMap := WeightMaps[weightKey]
    97  			delete(WeightMaps, weightKey)
    98  			// Find the common prefix and delete the duplicate keys
    99  			prefix, _ := GetCharacterSet(weightKey)
   100  			for _, duplicateKeyName := range duplicateKeyNames {
   101  				delete(WeightMaps, duplicateKeyName)
   102  				prefix = CommonPrefix(prefix, duplicateKeyName)
   103  			}
   104  			// If there is a common prefix then we'll prepend "common_", else concatenate all of the character sets
   105  			if len(prefix) > 0 {
   106  				prefix = "common_" + prefix
   107  			} else {
   108  				allCharsets := make([]string, 0, len(duplicateKeyNames))
   109  				allCharsetsMap := make(map[string]struct{})
   110  				firstCharset, _ := GetCharacterSet(weightKey)
   111  				allCharsets = append(allCharsets, firstCharset)
   112  				allCharsetsMap[firstCharset] = struct{}{}
   113  				for _, duplicateKeyName := range duplicateKeyNames {
   114  					charset, _ := GetCharacterSet(duplicateKeyName)
   115  					// Some duplicate collations may be in the same character set, so we filter those out too
   116  					if _, ok := allCharsetsMap[charset]; !ok {
   117  						allCharsets = append(allCharsets, charset)
   118  						allCharsetsMap[charset] = struct{}{}
   119  					}
   120  				}
   121  				prefix = "common_" + strings.Join(allCharsets, "_")
   122  			}
   123  			// Add the new key to the weight maps
   124  			_, newKey := GetCharacterSet(weightKey)
   125  			newKey = prefix + newKey
   126  			WeightMaps[newKey] = duplicatedMap
   127  			allDuplicatedMaps[newKey] = append([]string{weightKey}, duplicateKeyNames...)
   128  		}
   129  	}
   130  	weightKeys = SortedMapKeys(WeightMaps)
   131  
   132  	// Load the weightmaps file for writing
   133  	gofile, err := os.OpenFile("../weightmaps.go", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
   134  	if err != nil {
   135  		panic(err)
   136  	}
   137  	defer gofile.Close()
   138  	_, err = fmt.Fprintf(gofile, "%s", Header)
   139  	if err != nil {
   140  		panic(err)
   141  	}
   142  
   143  	// Write all of the keys and their corresponding weight maps to files
   144  	for _, k := range weightKeys {
   145  		v := WeightMaps[k]
   146  		OutputWeights(k, v)
   147  		OutputGoForMap(gofile, k)
   148  	}
   149  
   150  	// Display all of the duplicate maps and their new map name
   151  	duplicates := SortedMapKeys(allDuplicatedMaps)
   152  	for _, duplicate := range duplicates {
   153  		fmt.Printf("%s: [%s]\n", duplicate, strings.Join(allDuplicatedMaps[duplicate], ", "))
   154  	}
   155  }
   156  
   157  func OutputWeights(name string, weights map[rune]int32) {
   158  	binfile, err := os.OpenFile("../"+name+".bin", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
   159  	if err != nil {
   160  		panic(err)
   161  	}
   162  	defer binfile.Close()
   163  
   164  	keys := SortedMapKeys(weights)
   165  	for _, k := range keys {
   166  		v := weights[k]
   167  		err := binary.Write(binfile, binary.BigEndian, k)
   168  		if err != nil {
   169  			panic(err)
   170  		}
   171  		err = binary.Write(binfile, binary.BigEndian, v)
   172  		if err != nil {
   173  			panic(err)
   174  		}
   175  	}
   176  }
   177  
   178  func OutputGoForMap(gofile *os.File, name string) {
   179  	fmt.Fprintln(gofile)
   180  	fmt.Fprintln(gofile, "//go:embed "+name+".bin")
   181  	fmt.Fprintln(gofile, "var "+name+"_bin []byte // This is generated using the ./generate package.")
   182  	fmt.Fprintln(gofile, "var "+name+"_map = make(map[rune]int32)")
   183  	fmt.Fprintln(gofile, "var "+name+"_once sync.Once")
   184  	fmt.Fprintln(gofile)
   185  	fmt.Fprintln(gofile, "func "+name+"() map[rune]int32 {")
   186  	fmt.Fprintln(gofile, "\t"+name+"_once.Do(func() { loadWeightsMap("+name+"_map, "+name+"_bin) })")
   187  	fmt.Fprintln(gofile, "\treturn "+name+"_map")
   188  	fmt.Fprintln(gofile, "}")
   189  }
   190  
   191  var WeightMaps = map[string]map[rune]int32{
   192  	"utf16_croatian_ci_Weights":          utf16_croatian_ci_Weights,
   193  	"utf16_czech_ci_Weights":             utf16_czech_ci_Weights,
   194  	"utf16_danish_ci_Weights":            utf16_danish_ci_Weights,
   195  	"utf16_esperanto_ci_Weights":         utf16_esperanto_ci_Weights,
   196  	"utf16_estonian_ci_Weights":          utf16_estonian_ci_Weights,
   197  	"utf16_german2_ci_Weights":           utf16_german2_ci_Weights,
   198  	"utf16_hungarian_ci_Weights":         utf16_hungarian_ci_Weights,
   199  	"utf16_icelandic_ci_Weights":         utf16_icelandic_ci_Weights,
   200  	"utf16_latvian_ci_Weights":           utf16_latvian_ci_Weights,
   201  	"utf16_lithuanian_ci_Weights":        utf16_lithuanian_ci_Weights,
   202  	"utf16_persian_ci_Weights":           utf16_persian_ci_Weights,
   203  	"utf16_polish_ci_Weights":            utf16_polish_ci_Weights,
   204  	"utf16_roman_ci_Weights":             utf16_roman_ci_Weights,
   205  	"utf16_romanian_ci_Weights":          utf16_romanian_ci_Weights,
   206  	"utf16_sinhala_ci_Weights":           utf16_sinhala_ci_Weights,
   207  	"utf16_slovak_ci_Weights":            utf16_slovak_ci_Weights,
   208  	"utf16_slovenian_ci_Weights":         utf16_slovenian_ci_Weights,
   209  	"utf16_spanish2_ci_Weights":          utf16_spanish2_ci_Weights,
   210  	"utf16_spanish_ci_Weights":           utf16_spanish_ci_Weights,
   211  	"utf16_swedish_ci_Weights":           utf16_swedish_ci_Weights,
   212  	"utf16_turkish_ci_Weights":           utf16_turkish_ci_Weights,
   213  	"utf16_unicode_520_ci_Weights":       utf16_unicode_520_ci_Weights,
   214  	"utf16_unicode_ci_Weights":           utf16_unicode_ci_Weights,
   215  	"utf16_vietnamese_ci_Weights":        utf16_vietnamese_ci_Weights,
   216  	"utf32_croatian_ci_Weights":          utf32_croatian_ci_Weights,
   217  	"utf32_czech_ci_Weights":             utf32_czech_ci_Weights,
   218  	"utf32_danish_ci_Weights":            utf32_danish_ci_Weights,
   219  	"utf32_esperanto_ci_Weights":         utf32_esperanto_ci_Weights,
   220  	"utf32_estonian_ci_Weights":          utf32_estonian_ci_Weights,
   221  	"utf32_german2_ci_Weights":           utf32_german2_ci_Weights,
   222  	"utf32_hungarian_ci_Weights":         utf32_hungarian_ci_Weights,
   223  	"utf32_icelandic_ci_Weights":         utf32_icelandic_ci_Weights,
   224  	"utf32_latvian_ci_Weights":           utf32_latvian_ci_Weights,
   225  	"utf32_lithuanian_ci_Weights":        utf32_lithuanian_ci_Weights,
   226  	"utf32_persian_ci_Weights":           utf32_persian_ci_Weights,
   227  	"utf32_polish_ci_Weights":            utf32_polish_ci_Weights,
   228  	"utf32_roman_ci_Weights":             utf32_roman_ci_Weights,
   229  	"utf32_romanian_ci_Weights":          utf32_romanian_ci_Weights,
   230  	"utf32_sinhala_ci_Weights":           utf32_sinhala_ci_Weights,
   231  	"utf32_slovak_ci_Weights":            utf32_slovak_ci_Weights,
   232  	"utf32_slovenian_ci_Weights":         utf32_slovenian_ci_Weights,
   233  	"utf32_spanish2_ci_Weights":          utf32_spanish2_ci_Weights,
   234  	"utf32_spanish_ci_Weights":           utf32_spanish_ci_Weights,
   235  	"utf32_swedish_ci_Weights":           utf32_swedish_ci_Weights,
   236  	"utf32_turkish_ci_Weights":           utf32_turkish_ci_Weights,
   237  	"utf32_unicode_520_ci_Weights":       utf32_unicode_520_ci_Weights,
   238  	"utf32_unicode_ci_Weights":           utf32_unicode_ci_Weights,
   239  	"utf32_vietnamese_ci_Weights":        utf32_vietnamese_ci_Weights,
   240  	"utf8mb3_croatian_ci_Weights":        utf8mb3_croatian_ci_Weights,
   241  	"utf8mb3_czech_ci_Weights":           utf8mb3_czech_ci_Weights,
   242  	"utf8mb3_danish_ci_Weights":          utf8mb3_danish_ci_Weights,
   243  	"utf8mb3_esperanto_ci_Weights":       utf8mb3_esperanto_ci_Weights,
   244  	"utf8mb3_estonian_ci_Weights":        utf8mb3_estonian_ci_Weights,
   245  	"utf8mb3_german2_ci_Weights":         utf8mb3_german2_ci_Weights,
   246  	"utf8mb3_hungarian_ci_Weights":       utf8mb3_hungarian_ci_Weights,
   247  	"utf8mb3_icelandic_ci_Weights":       utf8mb3_icelandic_ci_Weights,
   248  	"utf8mb3_latvian_ci_Weights":         utf8mb3_latvian_ci_Weights,
   249  	"utf8mb3_lithuanian_ci_Weights":      utf8mb3_lithuanian_ci_Weights,
   250  	"utf8mb3_persian_ci_Weights":         utf8mb3_persian_ci_Weights,
   251  	"utf8mb3_polish_ci_Weights":          utf8mb3_polish_ci_Weights,
   252  	"utf8mb3_roman_ci_Weights":           utf8mb3_roman_ci_Weights,
   253  	"utf8mb3_romanian_ci_Weights":        utf8mb3_romanian_ci_Weights,
   254  	"utf8mb3_sinhala_ci_Weights":         utf8mb3_sinhala_ci_Weights,
   255  	"utf8mb3_slovak_ci_Weights":          utf8mb3_slovak_ci_Weights,
   256  	"utf8mb3_slovenian_ci_Weights":       utf8mb3_slovenian_ci_Weights,
   257  	"utf8mb3_spanish2_ci_Weights":        utf8mb3_spanish2_ci_Weights,
   258  	"utf8mb3_spanish_ci_Weights":         utf8mb3_spanish_ci_Weights,
   259  	"utf8mb3_swedish_ci_Weights":         utf8mb3_swedish_ci_Weights,
   260  	"utf8mb3_turkish_ci_Weights":         utf8mb3_turkish_ci_Weights,
   261  	"utf8mb3_unicode_520_ci_Weights":     utf8mb3_unicode_520_ci_Weights,
   262  	"utf8mb3_unicode_ci_Weights":         utf8mb3_unicode_ci_Weights,
   263  	"utf8mb3_vietnamese_ci_Weights":      utf8mb3_vietnamese_ci_Weights,
   264  	"utf8mb4_0900_ai_ci_Weights":         utf8mb4_0900_ai_ci_Weights,
   265  	"utf8mb4_0900_as_ci_Weights":         utf8mb4_0900_as_ci_Weights,
   266  	"utf8mb4_0900_as_cs_Weights":         utf8mb4_0900_as_cs_Weights,
   267  	"utf8mb4_croatian_ci_Weights":        utf8mb4_croatian_ci_Weights,
   268  	"utf8mb4_cs_0900_ai_ci_Weights":      utf8mb4_cs_0900_ai_ci_Weights,
   269  	"utf8mb4_cs_0900_as_cs_Weights":      utf8mb4_cs_0900_as_cs_Weights,
   270  	"utf8mb4_czech_ci_Weights":           utf8mb4_czech_ci_Weights,
   271  	"utf8mb4_da_0900_ai_ci_Weights":      utf8mb4_da_0900_ai_ci_Weights,
   272  	"utf8mb4_da_0900_as_cs_Weights":      utf8mb4_da_0900_as_cs_Weights,
   273  	"utf8mb4_danish_ci_Weights":          utf8mb4_danish_ci_Weights,
   274  	"utf8mb4_de_pb_0900_ai_ci_Weights":   utf8mb4_de_pb_0900_ai_ci_Weights,
   275  	"utf8mb4_de_pb_0900_as_cs_Weights":   utf8mb4_de_pb_0900_as_cs_Weights,
   276  	"utf8mb4_eo_0900_ai_ci_Weights":      utf8mb4_eo_0900_ai_ci_Weights,
   277  	"utf8mb4_eo_0900_as_cs_Weights":      utf8mb4_eo_0900_as_cs_Weights,
   278  	"utf8mb4_es_0900_ai_ci_Weights":      utf8mb4_es_0900_ai_ci_Weights,
   279  	"utf8mb4_es_0900_as_cs_Weights":      utf8mb4_es_0900_as_cs_Weights,
   280  	"utf8mb4_es_trad_0900_ai_ci_Weights": utf8mb4_es_trad_0900_ai_ci_Weights,
   281  	"utf8mb4_es_trad_0900_as_cs_Weights": utf8mb4_es_trad_0900_as_cs_Weights,
   282  	"utf8mb4_esperanto_ci_Weights":       utf8mb4_esperanto_ci_Weights,
   283  	"utf8mb4_estonian_ci_Weights":        utf8mb4_estonian_ci_Weights,
   284  	"utf8mb4_et_0900_ai_ci_Weights":      utf8mb4_et_0900_ai_ci_Weights,
   285  	"utf8mb4_et_0900_as_cs_Weights":      utf8mb4_et_0900_as_cs_Weights,
   286  	"utf8mb4_german2_ci_Weights":         utf8mb4_german2_ci_Weights,
   287  	"utf8mb4_hr_0900_ai_ci_Weights":      utf8mb4_hr_0900_ai_ci_Weights,
   288  	"utf8mb4_hr_0900_as_cs_Weights":      utf8mb4_hr_0900_as_cs_Weights,
   289  	"utf8mb4_hu_0900_ai_ci_Weights":      utf8mb4_hu_0900_ai_ci_Weights,
   290  	"utf8mb4_hu_0900_as_cs_Weights":      utf8mb4_hu_0900_as_cs_Weights,
   291  	"utf8mb4_hungarian_ci_Weights":       utf8mb4_hungarian_ci_Weights,
   292  	"utf8mb4_icelandic_ci_Weights":       utf8mb4_icelandic_ci_Weights,
   293  	"utf8mb4_is_0900_ai_ci_Weights":      utf8mb4_is_0900_ai_ci_Weights,
   294  	"utf8mb4_is_0900_as_cs_Weights":      utf8mb4_is_0900_as_cs_Weights,
   295  	"utf8mb4_ja_0900_as_cs_Weights":      utf8mb4_ja_0900_as_cs_Weights,
   296  	"utf8mb4_ja_0900_as_cs_ks_Weights":   utf8mb4_ja_0900_as_cs_ks_Weights,
   297  	"utf8mb4_la_0900_ai_ci_Weights":      utf8mb4_la_0900_ai_ci_Weights,
   298  	"utf8mb4_la_0900_as_cs_Weights":      utf8mb4_la_0900_as_cs_Weights,
   299  	"utf8mb4_latvian_ci_Weights":         utf8mb4_latvian_ci_Weights,
   300  	"utf8mb4_lithuanian_ci_Weights":      utf8mb4_lithuanian_ci_Weights,
   301  	"utf8mb4_lt_0900_ai_ci_Weights":      utf8mb4_lt_0900_ai_ci_Weights,
   302  	"utf8mb4_lt_0900_as_cs_Weights":      utf8mb4_lt_0900_as_cs_Weights,
   303  	"utf8mb4_lv_0900_ai_ci_Weights":      utf8mb4_lv_0900_ai_ci_Weights,
   304  	"utf8mb4_lv_0900_as_cs_Weights":      utf8mb4_lv_0900_as_cs_Weights,
   305  	"utf8mb4_persian_ci_Weights":         utf8mb4_persian_ci_Weights,
   306  	"utf8mb4_pl_0900_ai_ci_Weights":      utf8mb4_pl_0900_ai_ci_Weights,
   307  	"utf8mb4_pl_0900_as_cs_Weights":      utf8mb4_pl_0900_as_cs_Weights,
   308  	"utf8mb4_polish_ci_Weights":          utf8mb4_polish_ci_Weights,
   309  	"utf8mb4_ro_0900_ai_ci_Weights":      utf8mb4_ro_0900_ai_ci_Weights,
   310  	"utf8mb4_ro_0900_as_cs_Weights":      utf8mb4_ro_0900_as_cs_Weights,
   311  	"utf8mb4_roman_ci_Weights":           utf8mb4_roman_ci_Weights,
   312  	"utf8mb4_romanian_ci_Weights":        utf8mb4_romanian_ci_Weights,
   313  	"utf8mb4_ru_0900_ai_ci_Weights":      utf8mb4_ru_0900_ai_ci_Weights,
   314  	"utf8mb4_ru_0900_as_cs_Weights":      utf8mb4_ru_0900_as_cs_Weights,
   315  	"utf8mb4_sinhala_ci_Weights":         utf8mb4_sinhala_ci_Weights,
   316  	"utf8mb4_sk_0900_ai_ci_Weights":      utf8mb4_sk_0900_ai_ci_Weights,
   317  	"utf8mb4_sk_0900_as_cs_Weights":      utf8mb4_sk_0900_as_cs_Weights,
   318  	"utf8mb4_sl_0900_ai_ci_Weights":      utf8mb4_sl_0900_ai_ci_Weights,
   319  	"utf8mb4_sl_0900_as_cs_Weights":      utf8mb4_sl_0900_as_cs_Weights,
   320  	"utf8mb4_slovak_ci_Weights":          utf8mb4_slovak_ci_Weights,
   321  	"utf8mb4_slovenian_ci_Weights":       utf8mb4_slovenian_ci_Weights,
   322  	"utf8mb4_spanish2_ci_Weights":        utf8mb4_spanish2_ci_Weights,
   323  	"utf8mb4_spanish_ci_Weights":         utf8mb4_spanish_ci_Weights,
   324  	"utf8mb4_sv_0900_ai_ci_Weights":      utf8mb4_sv_0900_ai_ci_Weights,
   325  	"utf8mb4_sv_0900_as_cs_Weights":      utf8mb4_sv_0900_as_cs_Weights,
   326  	"utf8mb4_swedish_ci_Weights":         utf8mb4_swedish_ci_Weights,
   327  	"utf8mb4_tr_0900_ai_ci_Weights":      utf8mb4_tr_0900_ai_ci_Weights,
   328  	"utf8mb4_tr_0900_as_cs_Weights":      utf8mb4_tr_0900_as_cs_Weights,
   329  	"utf8mb4_turkish_ci_Weights":         utf8mb4_turkish_ci_Weights,
   330  	"utf8mb4_unicode_520_ci_Weights":     utf8mb4_unicode_520_ci_Weights,
   331  	"utf8mb4_unicode_ci_Weights":         utf8mb4_unicode_ci_Weights,
   332  	"utf8mb4_vi_0900_ai_ci_Weights":      utf8mb4_vi_0900_ai_ci_Weights,
   333  	"utf8mb4_vi_0900_as_cs_Weights":      utf8mb4_vi_0900_as_cs_Weights,
   334  	"utf8mb4_vietnamese_ci_Weights":      utf8mb4_vietnamese_ci_Weights,
   335  	"utf8mb4_zh_0900_as_cs_Weights":      utf8mb4_zh_0900_as_cs_Weights,
   336  }
   337  
   338  var FileContentHashes = map[string]uint64{}
   339  
   340  func SortedMapKeys[K constraints.Ordered, V any](m map[K]V) []K {
   341  	keys := make([]K, 0, len(m))
   342  	for key := range m {
   343  		keys = append(keys, key)
   344  	}
   345  	sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
   346  	return keys
   347  }
   348  
   349  func CommonPrefix(str1 string, str2 string) string {
   350  	minLen := len(str1)
   351  	if len(str2) < minLen {
   352  		minLen = len(str2)
   353  	}
   354  	i := 0
   355  	for ; i < minLen; i++ {
   356  		if str1[i] != str2[i] {
   357  			break
   358  		}
   359  	}
   360  	return str1[:i]
   361  }
   362  
   363  func GetCharacterSet(str string) (charset string, restOfString string) {
   364  	index := strings.Index(str, "_")
   365  	return str[:index], str[index:]
   366  }