vitess.io/vitess@v0.16.2/go/vt/vtgate/vindexes/unicode_test.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vindexes
    18  
    19  import (
    20  	"strings"
    21  	"testing"
    22  	"time"
    23  )
    24  
    25  func TestNormalization(t *testing.T) {
    26  	tcases := []struct {
    27  		in, out string
    28  	}{{
    29  		in:  "Test",
    30  		out: "\x18\x16\x16L\x17\xf3\x18\x16",
    31  	}, {
    32  		in:  "TEST",
    33  		out: "\x18\x16\x16L\x17\xf3\x18\x16",
    34  	}, {
    35  		in:  "Te\u0301st",
    36  		out: "\x18\x16\x16L\x17\xf3\x18\x16",
    37  	}, {
    38  		in:  "Tést",
    39  		out: "\x18\x16\x16L\x17\xf3\x18\x16",
    40  	}, {
    41  		in:  "Bést",
    42  		out: "\x16\x05\x16L\x17\xf3\x18\x16",
    43  	}, {
    44  		in:  "Test ",
    45  		out: "\x18\x16\x16L\x17\xf3\x18\x16",
    46  	}, {
    47  		in:  " Test",
    48  		out: "\x01\t\x18\x16\x16L\x17\xf3\x18\x16",
    49  	}, {
    50  		in:  "Test\t",
    51  		out: "\x18\x16\x16L\x17\xf3\x18\x16\x01\x00",
    52  	}, {
    53  		in:  "TéstLooong",
    54  		out: "\x18\x16\x16L\x17\xf3\x18\x16\x17\x11\x17q\x17q\x17q\x17O\x16\x91",
    55  	}, {
    56  		in:  "T",
    57  		out: "\x18\x16",
    58  	}}
    59  	collator := newPooledCollator().(*pooledCollator)
    60  	for _, tcase := range tcases {
    61  		norm, err := normalize(collator.col, collator.buf, []byte(tcase.in))
    62  		if err != nil {
    63  			t.Errorf("normalize(%#v) error: %v", tcase.in, err)
    64  		}
    65  		out := string(norm)
    66  		if out != tcase.out {
    67  			t.Errorf("normalize(%#v): %#v, want %#v", tcase.in, out, tcase.out)
    68  		}
    69  	}
    70  }
    71  
    72  func TestInvalidUnicodeNormalization(t *testing.T) {
    73  	// These strings are known to contain invalid UTF-8.
    74  	inputs := []string{
    75  		"\x99\xeb\x9d\x18\xa4G\x84\x04]\x87\xf3\xc6|\xf2'F",
    76  		"D\x86\x15\xbb\xda\b1?j\x8e\xb6h\xd2\v\xf5\x05",
    77  		"\x8a[\xdf,\u007fĄE\x92\xd2W+\xcd\x06h\xd2",
    78  	}
    79  	wantErr := "invalid UTF-8"
    80  	collator := newPooledCollator().(*pooledCollator)
    81  
    82  	for _, in := range inputs {
    83  		// We've observed that infinite looping is a possible failure mode for the
    84  		// collator when given invalid UTF-8, so we detect that with a timer.
    85  		done := make(chan struct{})
    86  		go func() {
    87  			defer close(done)
    88  			_, err := normalize(collator.col, collator.buf, []byte(in))
    89  			if err == nil {
    90  				t.Errorf("normalize(%q) error = nil, expected error", in)
    91  			}
    92  			if !strings.Contains(err.Error(), wantErr) {
    93  				t.Errorf("normalize(%q) error = %q, want %q", in, err.Error(), wantErr)
    94  			}
    95  		}()
    96  		timer := time.NewTimer(100 * time.Millisecond)
    97  		select {
    98  		case <-done:
    99  			timer.Stop()
   100  		case <-timer.C:
   101  			t.Errorf("invalid input caused infinite loop: %q", in)
   102  		}
   103  	}
   104  }
   105  
   106  // BenchmarkNormalizeSafe is the naive case where we create a new collator
   107  // and buffer every time.
   108  func BenchmarkNormalizeSafe(b *testing.B) {
   109  	input := []byte("testing")
   110  
   111  	for i := 0; i < b.N; i++ {
   112  		collator := newPooledCollator().(*pooledCollator)
   113  		normalize(collator.col, collator.buf, input)
   114  	}
   115  }
   116  
   117  // BenchmarkNormalizeShared is the ideal case where the collator and buffer
   118  // are shared between iterations, assuming no concurrency.
   119  func BenchmarkNormalizeShared(b *testing.B) {
   120  	input := []byte("testing")
   121  	collator := newPooledCollator().(*pooledCollator)
   122  
   123  	for i := 0; i < b.N; i++ {
   124  		normalize(collator.col, collator.buf, input)
   125  	}
   126  }
   127  
   128  // BenchmarkNormalizePooled should get us close to the performance of
   129  // BenchmarkNormalizeShared, except that this way is safe for concurrent use.
   130  func BenchmarkNormalizePooled(b *testing.B) {
   131  	input := []byte("testing")
   132  
   133  	for i := 0; i < b.N; i++ {
   134  		collator := collatorPool.Get().(*pooledCollator)
   135  		normalize(collator.col, collator.buf, input)
   136  		collatorPool.Put(collator)
   137  	}
   138  }