vitess.io/vitess@v0.16.2/go/mysql/collations/tools/makecolldata/mysqlversions.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"bufio"
    21  	"fmt"
    22  	"log"
    23  	"os"
    24  	"path"
    25  	"path/filepath"
    26  	"sort"
    27  	"strconv"
    28  	"strings"
    29  	"unicode"
    30  
    31  	"github.com/spf13/pflag"
    32  
    33  	"vitess.io/vitess/go/mysql/collations/tools/makecolldata/codegen"
    34  )
    35  
    36  type versionInfo struct {
    37  	id        uint
    38  	alias     map[string]byte
    39  	isdefault byte
    40  }
    41  
    42  type alias struct {
    43  	mask byte
    44  	name string
    45  }
    46  
    47  // CharsetAliases is a list of all aliases that MySQL uses to refer to charsets.
    48  // As of MySQL 8, all versions of MySQL map the utf8 charset to utf8mb3;
    49  // this will be changed sometime in the future so it maps to utf8mb4.
    50  var CharsetAliases = map[string]string{
    51  	"utf8": "utf8mb3",
    52  }
    53  
    54  func makeversions(output string) {
    55  	pflag.Parse()
    56  
    57  	versionfiles, err := filepath.Glob("testdata/versions/collations_*.csv")
    58  	if err != nil {
    59  		log.Fatal(err)
    60  	}
    61  	sort.Strings(versionfiles)
    62  
    63  	versioninfo := make(map[uint]*versionInfo)
    64  	for v, versionCsv := range versionfiles {
    65  		f, err := os.Open(versionCsv)
    66  		if err != nil {
    67  			log.Fatal(err)
    68  		}
    69  
    70  		scan := bufio.NewScanner(f)
    71  		var row int
    72  		for scan.Scan() {
    73  			if row == 0 {
    74  				row++
    75  				continue
    76  			}
    77  
    78  			cols := strings.Split(scan.Text(), "\t")
    79  			collid, err := strconv.ParseUint(cols[2], 10, 16)
    80  			if err != nil {
    81  				log.Fatal(err)
    82  			}
    83  
    84  			vi := versioninfo[uint(collid)]
    85  			if vi == nil {
    86  				vi = &versionInfo{id: uint(collid), alias: make(map[string]byte)}
    87  				versioninfo[uint(collid)] = vi
    88  			}
    89  
    90  			collname := cols[0]
    91  			vi.alias[collname] |= 1 << v
    92  
    93  			for from, to := range CharsetAliases {
    94  				if strings.HasPrefix(collname, from+"_") {
    95  					aliased := strings.Replace(collname, from+"_", to+"_", 1)
    96  					vi.alias[aliased] |= 1 << v
    97  				}
    98  			}
    99  
   100  			switch cols[3] {
   101  			case "Yes":
   102  				vi.isdefault |= 1 << v
   103  			case "No", "":
   104  			default:
   105  				log.Fatalf("unknown value for IS_DEFAULT: %q", cols[3])
   106  			}
   107  
   108  			row++
   109  		}
   110  	}
   111  
   112  	var versions []string
   113  	for _, versionCsv := range versionfiles {
   114  		base := filepath.Base(versionCsv)
   115  		base = strings.TrimPrefix(base, "collations_")
   116  		base = strings.TrimSuffix(base, ".csv")
   117  		versions = append(versions, base)
   118  	}
   119  
   120  	var g = codegen.NewGenerator("vitess.io/vitess/go/mysql/collations")
   121  	g.P("type collver byte")
   122  	g.P("type collalias struct { mask collver; name string }")
   123  	g.P("const (")
   124  	g.P("collverInvalid collver = 0")
   125  	for n, version := range versions {
   126  		g.P("collver", version, " collver = 1 << ", n)
   127  	}
   128  	g.P(")")
   129  	g.P()
   130  	g.P("func (v collver) String() string {")
   131  	g.P("switch v {")
   132  	g.P("case collverInvalid: return \"Invalid\"")
   133  	for _, cv := range versions {
   134  		vi := strings.IndexFunc(cv, unicode.IsNumber)
   135  		database := cv[:vi]
   136  		version, _ := strconv.Atoi(cv[vi:])
   137  		toString := fmt.Sprintf("%s %.1f", database, float64(version)/10.0)
   138  
   139  		g.P("case collver", cv, ": return ", codegen.Quote(toString))
   140  	}
   141  	g.P("default: panic(\"invalid version identifier\")")
   142  	g.P("}")
   143  	g.P("}")
   144  
   145  	// These are the default aliases for charsets; right now they're common between
   146  	// all MySQL versions, but this is implemented as a method on `collver` so when
   147  	// MySQL maps utf8 to utfmb4, we can perform the mapping only for the specific
   148  	// MySQL version onwards.
   149  	g.P("func (v collver) charsetAliases() map[string]string { return ", fmt.Sprintf("%#v", CharsetAliases), "}")
   150  	g.P()
   151  	g.P("var globalVersionInfo = map[ID]struct{alias []collalias; isdefault collver}{")
   152  
   153  	var sorted []*versionInfo
   154  	for _, vi := range versioninfo {
   155  		sorted = append(sorted, vi)
   156  	}
   157  	sort.Slice(sorted, func(i, j int) bool {
   158  		return sorted[i].id < sorted[j].id
   159  	})
   160  	for _, vi := range sorted {
   161  		var reverse []alias
   162  		for a, m := range vi.alias {
   163  			reverse = append(reverse, alias{m, a})
   164  		}
   165  		sort.Slice(reverse, func(i, j int) bool {
   166  			return reverse[i].name < reverse[j].name
   167  		})
   168  		fmt.Fprintf(g, "%d: {alias: []collalias{", vi.id)
   169  		for _, a := range reverse {
   170  			fmt.Fprintf(g, "{0b%08b, %q},", a.mask, a.name)
   171  		}
   172  		fmt.Fprintf(g, "}, isdefault: 0b%08b},\n", vi.isdefault)
   173  	}
   174  	g.P("}")
   175  
   176  	g.WriteToFile(path.Join(output, "mysqlversion.go"))
   177  }