github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/digester_test.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package parser_test
    15  
    16  import (
    17  	"crypto/sha256"
    18  	"encoding/hex"
    19  	"fmt"
    20  	"testing"
    21  
    22  	"github.com/pingcap/tidb/parser"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestNormalize(t *testing.T) {
    27  	tests_for_generic_normalization_rules := []struct {
    28  		input  string
    29  		expect string
    30  	}{
    31  		// Generic normalization rules
    32  		{"select _utf8mb4'123'", "select (_charset) ?"},
    33  		{"SELECT 1", "select ?"},
    34  		{"select null", "select ?"},
    35  		{"select \\N", "select ?"},
    36  		{"SELECT `null`", "select `null`"},
    37  		{"select * from b where id = 1", "select * from `b` where `id` = ?"},
    38  		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )"},
    39  		{"select 1 from b where id in (1, a, 4)", "select ? from `b` where `id` in ( ? , `a` , ? )"},
    40  		{"select 1 from b order by 2", "select ? from `b` order by 2"},
    41  		{"select /*+ a hint */ 1", "select ?"},
    42  		{"select /* a hint */ 1", "select ?"},
    43  		{"select truncate(1, 2)", "select truncate ( ... )"},
    44  		{"select -1 + - 2 + b - c + 0.2 + (-2) from c where d in (1, -2, +3)", "select ? + ? + `b` - `c` + ? + ( ? ) from `c` where `d` in ( ... )"},
    45  		{"select * from t where a <= -1 and b < -2 and c = -3 and c > -4 and c >= -5 and e is 1", "select * from `t` where `a` <= ? and `b` < ? and `c` = ? and `c` > ? and `c` >= ? and `e` is ?"},
    46  		{"select count(a), b from t group by 2", "select count ( `a` ) , `b` from `t` group by 2"},
    47  		{"select count(a), b, c from t group by 2, 3", "select count ( `a` ) , `b` , `c` from `t` group by 2 , 3"},
    48  		{"select count(a), b, c from t group by (2, 3)", "select count ( `a` ) , `b` , `c` from `t` group by ( 2 , 3 )"},
    49  		{"select a, b from t order by 1, 2", "select `a` , `b` from `t` order by 1 , 2"},
    50  		{"select count(*) from t", "select count ( ? ) from `t`"},
    51  		{"select * from t Force Index(kk)", "select * from `t`"},
    52  		{"select * from t USE Index(kk)", "select * from `t`"},
    53  		{"select * from t Ignore Index(kk)", "select * from `t`"},
    54  		{"select * from t1 straight_join t2 on t1.id=t2.id", "select * from `t1` join `t2` on `t1` . `id` = `t2` . `id`"},
    55  		{"select * from `table`", "select * from `table`"},
    56  		{"select * from `30`", "select * from `30`"},
    57  		{"select * from `select`", "select * from `select`"},
    58  		{"select * from 🥳", "select * from `🥳`"},
    59  		// test syntax error, it will be checked by parser, but it should not make normalize dead loop.
    60  		{"select * from t ignore index(", "select * from `t` ignore index"},
    61  		{"select /*+ ", "select "},
    62  		{"select 1 / 2", "select ? / ?"},
    63  		{"select * from t where a = 40 limit ?, ?", "select * from `t` where `a` = ? limit ..."},
    64  		{"select * from t where a > ?", "select * from `t` where `a` > ?"},
    65  		{"select @a=b from t", "select @a = `b` from `t`"},
    66  		{"select * from `table", "select * from"},
    67  		{"Select * from t where (i, j) in ((1,1), (2,2))", "select * from `t` where ( `i` , `j` ) in ( ( ... ) )"},
    68  		{"insert into t values (1,1), (2,2)", "insert into `t` values ( ... )"},
    69  		{"insert into t values (1), (2)", "insert into `t` values ( ... )"},
    70  		{"insert into t values (1)", "insert into `t` values ( ? )"},
    71  	}
    72  	for _, test := range tests_for_generic_normalization_rules {
    73  		normalized := parser.Normalize(test.input)
    74  		digest := parser.DigestNormalized(normalized)
    75  		require.Equal(t, test.expect, normalized)
    76  
    77  		normalized2, digest2 := parser.NormalizeDigest(test.input)
    78  		require.Equal(t, normalized, normalized2)
    79  		require.Equalf(t, digest.String(), digest2.String(), "%+v", test)
    80  	}
    81  
    82  	tests_for_binding_specific_rules := []struct {
    83  		input  string
    84  		expect string
    85  	}{
    86  		// Binding specific rules
    87  		// IN (Lit) => IN ( ... ) #44298
    88  		{"select * from t where a in (1)", "select * from `t` where `a` in ( ... )"},
    89  		{"select * from t where (a, b) in ((1, 1))", "select * from `t` where ( `a` , `b` ) in ( ( ... ) )"},
    90  		{"select * from t where (a, b) in ((1, 1), (2, 2))", "select * from `t` where ( `a` , `b` ) in ( ( ... ) )"},
    91  		{"select * from t where a in(1, 2)", "select * from `t` where `a` in ( ... )"},
    92  		{"select * from t where a in(1, 2, 3)", "select * from `t` where `a` in ( ... )"},
    93  	}
    94  	for _, test := range tests_for_binding_specific_rules {
    95  		normalized := parser.NormalizeForBinding(test.input)
    96  		digest := parser.DigestNormalized(normalized)
    97  		require.Equal(t, test.expect, normalized)
    98  
    99  		normalized2, digest2 := parser.NormalizeDigestForBinding(test.input)
   100  		require.Equal(t, normalized, normalized2)
   101  		require.Equalf(t, digest.String(), digest2.String(), "%+v", test)
   102  	}
   103  }
   104  
   105  func TestNormalizeKeepHint(t *testing.T) {
   106  	tests := []struct {
   107  		input  string
   108  		expect string
   109  	}{
   110  		{"select _utf8mb4'123'", "select (_charset) ?"},
   111  		{"SELECT 1", "select ?"},
   112  		{"select null", "select ?"},
   113  		{"select \\N", "select ?"},
   114  		{"SELECT `null`", "select `null`"},
   115  		{"select * from b where id = 1", "select * from `b` where `id` = ?"},
   116  		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )"},
   117  		{"select 1 from b where id in (1, a, 4)", "select ? from `b` where `id` in ( ? , `a` , ? )"},
   118  		{"select 1 from b order by 2", "select ? from `b` order by 2"},
   119  		{"select /*+ a hint */ 1", "select /*+ a hint */ ?"},
   120  		{"select /* a hint */ 1", "select ?"},
   121  		{"select truncate(1, 2)", "select truncate ( ... )"},
   122  		{"select -1 + - 2 + b - c + 0.2 + (-2) from c where d in (1, -2, +3)", "select ? + ? + `b` - `c` + ? + ( ? ) from `c` where `d` in ( ... )"},
   123  		{"select * from t where a <= -1 and b < -2 and c = -3 and c > -4 and c >= -5 and e is 1", "select * from `t` where `a` <= ? and `b` < ? and `c` = ? and `c` > ? and `c` >= ? and `e` is ?"},
   124  		{"select count(a), b from t group by 2", "select count ( `a` ) , `b` from `t` group by 2"},
   125  		{"select count(a), b, c from t group by 2, 3", "select count ( `a` ) , `b` , `c` from `t` group by 2 , 3"},
   126  		{"select count(a), b, c from t group by (2, 3)", "select count ( `a` ) , `b` , `c` from `t` group by ( 2 , 3 )"},
   127  		{"select a, b from t order by 1, 2", "select `a` , `b` from `t` order by 1 , 2"},
   128  		{"select count(*) from t", "select count ( ? ) from `t`"},
   129  		{"select * from t Force Index(kk)", "select * from `t` force index ( `kk` )"},
   130  		{"select * from t USE Index(kk)", "select * from `t` use index ( `kk` )"},
   131  		{"select * from t Ignore Index(kk)", "select * from `t` ignore index ( `kk` )"},
   132  		{"select * from t1 straight_join t2 on t1.id=t2.id", "select * from `t1` straight_join `t2` on `t1` . `id` = `t2` . `id`"},
   133  		{"select * from `table`", "select * from `table`"},
   134  		{"select * from `30`", "select * from `30`"},
   135  		{"select * from `select`", "select * from `select`"},
   136  		{"select * from 🥳", "select * from `🥳`"},
   137  		// test syntax error, it will be checked by parser, but it should not make normalize dead loop.
   138  		{"select * from t ignore index(", "select * from `t` ignore index ("},
   139  		{"select /*+ ", "select "},
   140  		{"select 1 / 2", "select ? / ?"},
   141  		{"select * from t where a = 40 limit ?, ?", "select * from `t` where `a` = ? limit ..."},
   142  		{"select * from t where a > ?", "select * from `t` where `a` > ?"},
   143  		{"select @a=b from t", "select @a = `b` from `t`"},
   144  		{"select * from `table", "select * from"},
   145  	}
   146  	for _, test := range tests {
   147  		normalized := parser.NormalizeKeepHint(test.input)
   148  		require.Equal(t, test.expect, normalized)
   149  	}
   150  }
   151  
   152  func TestNormalizeDigest(t *testing.T) {
   153  	tests := []struct {
   154  		sql        string
   155  		normalized string
   156  		digest     string
   157  	}{
   158  		{"select 1 from b where id in (1, 3, '3', 1, 2, 3, 4)", "select ? from `b` where `id` in ( ... )", "e1c8cc2738f596dc24f15ef8eb55e0d902910d7298983496362a7b46dbc0b310"},
   159  	}
   160  	for _, test := range tests {
   161  		normalized, digest := parser.NormalizeDigest(test.sql)
   162  		require.Equal(t, test.normalized, normalized)
   163  		require.Equal(t, test.digest, digest.String())
   164  
   165  		normalized = parser.Normalize(test.sql)
   166  		digest = parser.DigestNormalized(normalized)
   167  		require.Equal(t, test.normalized, normalized)
   168  		require.Equal(t, test.digest, digest.String())
   169  	}
   170  }
   171  
   172  func TestDigestHashEqForSimpleSQL(t *testing.T) {
   173  	sqlGroups := [][]string{
   174  		{"select * from b where id = 1", "select * from b where id = '1'", "select * from b where id =2"},
   175  		{"select 2 from b, c where c.id > 1", "select 4 from b, c where c.id > 23"},
   176  		{"Select 3", "select 1"},
   177  		{"Select * from t where (i, j) in ((1,1), (2,2))", "select * from t where (i, j) in ((1,1), (2,2), (3,3))"},
   178  		{"insert into t values (1,1)", "insert into t values (1,1), (2,2)"},
   179  	}
   180  	for _, sqlGroup := range sqlGroups {
   181  		var d string
   182  		for _, sql := range sqlGroup {
   183  			dig := parser.DigestHash(sql)
   184  			if d == "" {
   185  				d = dig.String()
   186  				continue
   187  			}
   188  			require.Equal(t, dig.String(), d)
   189  		}
   190  	}
   191  }
   192  
   193  func TestDigestHashNotEqForSimpleSQL(t *testing.T) {
   194  	sqlGroups := [][]string{
   195  		{"select * from b where id = 1", "select a from b where id = 1", "select * from d where bid =1"},
   196  	}
   197  	for _, sqlGroup := range sqlGroups {
   198  		var d string
   199  		for _, sql := range sqlGroup {
   200  			dig := parser.DigestHash(sql)
   201  			if d == "" {
   202  				d = dig.String()
   203  				continue
   204  			}
   205  			require.NotEqual(t, dig.String(), d)
   206  		}
   207  	}
   208  }
   209  
   210  func TestGenDigest(t *testing.T) {
   211  	hash := genRandDigest("abc")
   212  	digest := parser.NewDigest(hash)
   213  	require.Equal(t, fmt.Sprintf("%x", hash), digest.String())
   214  	require.Equal(t, hash, digest.Bytes())
   215  	digest = parser.NewDigest(nil)
   216  	require.Equal(t, "", digest.String())
   217  	require.Nil(t, digest.Bytes())
   218  }
   219  
   220  func genRandDigest(str string) []byte {
   221  	hasher := sha256.New()
   222  	hasher.Write([]byte(str))
   223  	return hasher.Sum(nil)
   224  }
   225  
   226  func BenchmarkDigestHexEncode(b *testing.B) {
   227  	digest1 := genRandDigest("abc")
   228  	b.ResetTimer()
   229  	for i := 0; i < b.N; i++ {
   230  		hex.EncodeToString(digest1)
   231  	}
   232  }
   233  
   234  func BenchmarkDigestSprintf(b *testing.B) {
   235  	digest1 := genRandDigest("abc")
   236  	b.ResetTimer()
   237  	for i := 0; i < b.N; i++ {
   238  		fmt.Sprintf("%x", digest1)
   239  	}
   240  }