vitess.io/vitess@v0.16.2/go/mysql/collations/tools/makecolldata/contractions.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  
    23  	"vitess.io/vitess/go/mysql/collations/internal/uca"
    24  	"vitess.io/vitess/go/mysql/collations/tools/makecolldata/codegen"
    25  )
    26  
    27  func sortContractionTrie(trie map[rune][]uca.Contraction) (sorted []rune) {
    28  	for cp := range trie {
    29  		sorted = append(sorted, cp)
    30  	}
    31  	sort.Slice(sorted, func(i, j int) bool {
    32  		return sorted[i] < sorted[j]
    33  	})
    34  	return
    35  }
    36  
    37  type weightarray struct {
    38  	name string
    39  	ary  []uint16
    40  }
    41  
    42  func (wa *weightarray) push(weights []uint16) string {
    43  	start := len(wa.ary)
    44  	wa.ary = append(wa.ary, weights...)
    45  	return fmt.Sprintf("%s[%d:%d]", wa.name, start, len(wa.ary))
    46  }
    47  
    48  func (wa *weightarray) print(g *codegen.Generator) {
    49  	g.P("var ", wa.name, " = ", codegen.Array16(wa.ary))
    50  }
    51  
    52  func printContraction1(g *codegen.Generator, wa *weightarray, incont []uca.Contraction, depth int) {
    53  	trie := make(map[rune][]uca.Contraction)
    54  	var leaf *uca.Contraction
    55  	for i := range incont {
    56  		// Ensure local variable since we grab a pointer later
    57  		// potentially.
    58  		cont := incont[i]
    59  		if depth < len(cont.Path) {
    60  			r := cont.Path[depth]
    61  			trie[r] = append(trie[r], cont)
    62  		}
    63  		if depth == len(cont.Path) {
    64  			leaf = &cont // nolint:exportloopref
    65  		}
    66  	}
    67  
    68  	if depth > 1 {
    69  		g.P("b", depth-1, " := b", depth-2, "[width", depth-1, ":]")
    70  	}
    71  	if depth > 0 {
    72  		g.P("cp", depth, ", width", depth, " := cs.DecodeRune(b", depth-1, ")")
    73  	}
    74  
    75  	g.P("switch cp", depth, " {")
    76  
    77  	for _, cp := range sortContractionTrie(trie) {
    78  		g.P("case ", cp, ":")
    79  		cnt := trie[cp]
    80  
    81  		if len(cnt) == 1 && len(cnt[0].Path) == depth+1 {
    82  			weights := wa.push(cnt[0].Weights)
    83  			g.P("return ", weights, ", b", depth-1, "[width", depth, ":], ", depth+1)
    84  		} else {
    85  			printContraction1(g, wa, cnt, depth+1)
    86  		}
    87  	}
    88  
    89  	g.P("}")
    90  
    91  	if leaf != nil {
    92  		weights := wa.push(incont[0].Weights)
    93  		g.P("return ", weights, ", b", depth-1, ", ", depth)
    94  	}
    95  }
    96  
    97  func (g *TableGenerator) printFastContractionsCtx(name string, allContractions []uca.Contraction) {
    98  	trie := make(map[rune][]uca.Contraction)
    99  	for _, cont := range allContractions {
   100  		cp := cont.Path[0]
   101  		trie[cp] = append(trie[cp], cont)
   102  	}
   103  
   104  	g.P("func (", name, ") Find(", PkgCharset, ".Charset, rune, []byte) ([]uint16, []byte, int) {")
   105  	g.P("return nil, nil, 0")
   106  	g.P("}")
   107  
   108  	var mapping = make(map[uint32][]uint16)
   109  	var cp0min, cp1min rune = 0xFFFF, 0xFFFF
   110  	for _, cp1 := range sortContractionTrie(trie) {
   111  		for _, cnt := range trie[cp1] {
   112  			cp0 := cnt.Path[1]
   113  			if cp0 < cp0min {
   114  				cp0min = cp0
   115  			}
   116  			if cp1 < cp1min {
   117  				cp1min = cp1
   118  			}
   119  
   120  			mask := uint32(cp1)<<16 | uint32(cp0)
   121  			mapping[mask] = cnt.Weights
   122  		}
   123  	}
   124  
   125  	g.P("var ", name, "_weights = ", mapping)
   126  
   127  	g.P("func (", name, ") FindContextual(cp1, cp0 rune) []uint16 {")
   128  	g.P("if cp0 < ", cp0min, " || cp1 < ", cp1min, " || cp0 > 0xFFFF || cp1 > 0xFFFF {")
   129  	g.P("return nil")
   130  	g.P("}")
   131  	g.P("return ", name, "_weights[uint32(cp1) << 16 | uint32(cp0)]")
   132  	g.P("}")
   133  }
   134  
   135  func (g *TableGenerator) printContractionsFast(name string, allContractions []uca.Contraction) {
   136  	contextual := false
   137  	for i := range allContractions {
   138  		ctr := &allContractions[i]
   139  		for i := 0; i < len(ctr.Weights)-3; i += 3 {
   140  			if ctr.Weights[i] == 0x0 && ctr.Weights[i+1] == 0x0 && ctr.Weights[i+2] == 0x0 {
   141  				ctr.Weights = ctr.Weights[:i]
   142  				break
   143  			}
   144  		}
   145  		if ctr.Contextual {
   146  			contextual = true
   147  		}
   148  		if contextual {
   149  			if !ctr.Contextual {
   150  				g.Fail("mixed Contextual and non-contextual contractions")
   151  			}
   152  			if len(ctr.Path) != 2 {
   153  				g.Fail("Contextual contraction with Path != 2")
   154  			}
   155  		}
   156  	}
   157  
   158  	g.P("type ", name, " struct{}")
   159  	g.P()
   160  
   161  	if contextual {
   162  		g.printFastContractionsCtx(name, allContractions)
   163  		return
   164  	}
   165  
   166  	var wa = &weightarray{name: name + "_weights"}
   167  
   168  	g.P("func (", name, ") Find(cs ", PkgCharset, ".Charset, cp0 rune, b0 []byte) ([]uint16, []byte, int) {")
   169  	printContraction1(g.Generator, wa, allContractions, 0)
   170  	g.P("return nil, nil, 0")
   171  	g.P("}")
   172  	g.P("func (", name, ") FindContextual(cp1, cp0 rune) []uint16 {")
   173  	g.P("return nil")
   174  	g.P("}")
   175  
   176  	wa.print(g.Generator)
   177  }