vitess.io/vitess@v0.16.2/go/mysql/collations/integration/collations_test.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package integration
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"encoding/hex"
    23  	"fmt"
    24  	"os"
    25  	"path"
    26  	"path/filepath"
    27  	"strings"
    28  	"testing"
    29  
    30  	"github.com/spf13/pflag"
    31  	"github.com/stretchr/testify/require"
    32  	"golang.org/x/text/encoding/unicode/utf32"
    33  
    34  	"vitess.io/vitess/go/mysql"
    35  	"vitess.io/vitess/go/mysql/collations"
    36  	"vitess.io/vitess/go/mysql/collations/remote"
    37  	"vitess.io/vitess/go/sqltypes"
    38  	"vitess.io/vitess/go/vt/servenv"
    39  	"vitess.io/vitess/go/vt/sqlparser"
    40  )
    41  
    42  var collationEnv *collations.Environment
    43  
    44  func init() {
    45  	// We require MySQL 8.0 collations for the comparisons in the tests
    46  	mySQLVersion := "8.0.0"
    47  	servenv.SetMySQLServerVersionForTest(mySQLVersion)
    48  	collationEnv = collations.NewEnvironment(mySQLVersion)
    49  }
    50  
    51  func getSQLQueries(t *testing.T, testfile string) []string {
    52  	tf, err := os.Open(testfile)
    53  	if err != nil {
    54  		t.Fatal(err)
    55  	}
    56  	defer tf.Close()
    57  
    58  	var chunks []string
    59  	var curchunk bytes.Buffer
    60  
    61  	addchunk := func() {
    62  		if curchunk.Len() > 0 {
    63  			stmts, err := sqlparser.SplitStatementToPieces(curchunk.String())
    64  			if err != nil {
    65  				t.Fatal(err)
    66  			}
    67  			chunks = append(chunks, stmts...)
    68  			curchunk.Reset()
    69  		}
    70  	}
    71  
    72  	scanner := bufio.NewScanner(tf)
    73  	for scanner.Scan() {
    74  		if strings.HasPrefix(scanner.Text(), "--") {
    75  			addchunk()
    76  			chunks = append(chunks, scanner.Text())
    77  		} else {
    78  			if curchunk.Len() > 0 {
    79  				curchunk.WriteByte(' ')
    80  			}
    81  			curchunk.Write(scanner.Bytes())
    82  		}
    83  	}
    84  	addchunk()
    85  	return chunks
    86  }
    87  
    88  type TestOnResults interface {
    89  	Test(t *testing.T, result *sqltypes.Result)
    90  }
    91  
    92  type uca900CollationTest struct {
    93  	collation string
    94  }
    95  
    96  var defaultUtf32 = utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM)
    97  
    98  func parseUtf32cp(b []byte) []byte {
    99  	var hexbuf [16]byte
   100  	c, err := hex.Decode(hexbuf[:], b)
   101  	if err != nil {
   102  		return nil
   103  	}
   104  	utf8, _ := defaultUtf32.NewDecoder().Bytes(hexbuf[:c])
   105  	return utf8
   106  }
   107  
   108  func parseWeightString(b []byte) []byte {
   109  	dst := make([]byte, hex.DecodedLen(len(b)))
   110  	n, err := hex.Decode(dst, b)
   111  	if err != nil {
   112  		return nil
   113  	}
   114  	return dst[:n]
   115  }
   116  
   117  func (u *uca900CollationTest) Test(t *testing.T, result *sqltypes.Result) {
   118  	coll := collationEnv.LookupByName(u.collation)
   119  	require.NotNil(t, coll, "unknown collation %q", u.collation)
   120  
   121  	var checked, errors int
   122  	for _, row := range result.Rows {
   123  		if row[1].Len() == 0 {
   124  			continue
   125  		}
   126  		rowBytes, err := row[0].ToBytes()
   127  		require.NoError(t, err)
   128  		utf8Input := parseUtf32cp(rowBytes)
   129  		if utf8Input == nil {
   130  			t.Errorf("[%s] failed to parse UTF32-encoded codepoint: %s (%s)", u.collation, row[0], row[2].ToString())
   131  			errors++
   132  			continue
   133  		}
   134  		rowBytes, err = row[1].ToBytes()
   135  		require.NoError(t, err)
   136  		expectedWeightString := parseWeightString(rowBytes)
   137  		if expectedWeightString == nil {
   138  			t.Errorf("[%s] failed to parse weight string: %s (%s)", u.collation, row[1], row[2].ToString())
   139  			errors++
   140  			continue
   141  		}
   142  
   143  		weightString := coll.WeightString(make([]byte, 0, 128), utf8Input, 0)
   144  		if !bytes.Equal(weightString, expectedWeightString) {
   145  			t.Errorf("[%s] mismatch for %s (%v): \n\twant: %v\n\tgot:  %v", u.collation, row[2].ToString(), utf8Input, expectedWeightString, weightString)
   146  			errors++
   147  		}
   148  		checked++
   149  	}
   150  
   151  	t.Logf("uca900CollationTest[%s]: checked %d codepoints, %d failed (%.02f%%)", u.collation, checked, errors, float64(errors)/float64(checked)*100.0)
   152  }
   153  
   154  func processSQLTest(t *testing.T, testfile string, conn *mysql.Conn) {
   155  	var curtest TestOnResults
   156  
   157  	for _, query := range getSQLQueries(t, testfile) {
   158  		if strings.HasPrefix(query, "--") {
   159  			switch {
   160  			case strings.HasPrefix(query, "--source "):
   161  				include := strings.TrimPrefix(query, "--source ")
   162  				include = path.Join("testdata/mysqltest", include)
   163  				processSQLTest(t, include, conn)
   164  
   165  			case strings.HasPrefix(query, "--test:uca0900 "):
   166  				collation := strings.TrimPrefix(query, "--test:uca0900 ")
   167  				curtest = &uca900CollationTest{collation}
   168  
   169  			case query == "--disable_warnings" || query == "--enable_warnings":
   170  			case query == "--disable_query_log" || query == "--enable_query_log":
   171  
   172  			default:
   173  				t.Logf("unsupported statement: %q", query)
   174  			}
   175  			continue
   176  		}
   177  
   178  		res := exec(t, conn, query)
   179  		if curtest != nil {
   180  			curtest.Test(t, res)
   181  			curtest = nil
   182  		}
   183  	}
   184  }
   185  
   186  var testOneCollation = pflag.String("test-one-collation", "", "")
   187  
   188  func TestCollationsOnMysqld(t *testing.T) {
   189  	conn := mysqlconn(t)
   190  	defer conn.Close()
   191  
   192  	if *testOneCollation != "" {
   193  		processSQLTest(t, fmt.Sprintf("testdata/mysqltest/suite/collations/%s.test", *testOneCollation), conn)
   194  		return
   195  	}
   196  
   197  	testfiles, _ := filepath.Glob("testdata/mysqltest/suite/collations/*.test")
   198  	for _, testfile := range testfiles {
   199  		t.Run(testfile, func(t *testing.T) {
   200  			processSQLTest(t, testfile, conn)
   201  		})
   202  	}
   203  }
   204  
   205  func TestRemoteKanaSensitivity(t *testing.T) {
   206  	var Kana1 = []byte("の東京ノ")
   207  	var Kana2 = []byte("ノ東京の")
   208  
   209  	testRemoteComparison(t, nil, []testcmp{
   210  		{"utf8mb4_0900_as_cs", Kana1, Kana2},
   211  		{"utf8mb4_ja_0900_as_cs", Kana1, Kana2},
   212  		{"utf8mb4_ja_0900_as_cs_ks", Kana1, Kana2},
   213  	})
   214  }
   215  
   216  const ExampleString = "abc æøå 日本語"
   217  
   218  func TestCollationWithSpace(t *testing.T) {
   219  	conn := mysqlconn(t)
   220  	defer conn.Close()
   221  
   222  	codepoints := len([]rune(ExampleString))
   223  
   224  	for _, collName := range []string{"utf8mb4_0900_ai_ci", "utf8mb4_unicode_ci", "utf8mb4_unicode_520_ci"} {
   225  		t.Run(collName, func(t *testing.T) {
   226  			local := collationEnv.LookupByName(collName)
   227  			remote := remote.NewCollation(conn, collName)
   228  
   229  			for _, size := range []int{0, codepoints, codepoints + 1, codepoints + 2, 20, 32} {
   230  				localWeight := local.WeightString(nil, []byte(ExampleString), size)
   231  				remoteWeight := remote.WeightString(nil, []byte(ExampleString), size)
   232  				require.True(t, bytes.Equal(localWeight, remoteWeight), "mismatch at len=%d\ninput:    %#v\nexpected: %#v\nactual:   %#v", size, []byte(ExampleString), remoteWeight, localWeight)
   233  
   234  			}
   235  		})
   236  	}
   237  }