github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/like_regexp_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"fmt"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  func TestLikeRegexp(t *testing.T) {
    23  	type test struct {
    24  		input         []byte
    25  		subject       []byte
    26  		shouldMatch   bool
    27  		expectedError error
    28  	}
    29  
    30  	run := func(t *testing.T, tests []test) {
    31  		for _, test := range tests {
    32  			t.Run(fmt.Sprintf("for input %q and subject %q", string(test.input),
    33  				string(test.subject)), func(t *testing.T) {
    34  				res, err := parseLikeRegexp(test.input)
    35  				if test.expectedError != nil {
    36  					assert.Equal(t, test.expectedError, err)
    37  					return
    38  				}
    39  
    40  				require.Nil(t, err)
    41  				assert.Equal(t, test.shouldMatch, res.regexp.Match(test.subject))
    42  			})
    43  		}
    44  	}
    45  
    46  	t.Run("without a wildcard", func(t *testing.T) {
    47  		input := []byte("car")
    48  		tests := []test{
    49  			{input: input, subject: []byte("car"), shouldMatch: true},
    50  			{input: input, subject: []byte("care"), shouldMatch: false},
    51  			{input: input, subject: []byte("supercar"), shouldMatch: false},
    52  		}
    53  
    54  		run(t, tests)
    55  	})
    56  
    57  	t.Run("with a single-character wildcard", func(t *testing.T) {
    58  		input := []byte("car?")
    59  		tests := []test{
    60  			{input: input, subject: []byte("car"), shouldMatch: false},
    61  			{input: input, subject: []byte("cap"), shouldMatch: false},
    62  			{input: input, subject: []byte("care"), shouldMatch: true},
    63  			{input: input, subject: []byte("supercar"), shouldMatch: false},
    64  			{input: input, subject: []byte("carer"), shouldMatch: false},
    65  		}
    66  
    67  		run(t, tests)
    68  	})
    69  
    70  	t.Run("with a multi-character wildcard", func(t *testing.T) {
    71  		input := []byte("car*")
    72  		tests := []test{
    73  			{input: input, subject: []byte("car"), shouldMatch: true},
    74  			{input: input, subject: []byte("cap"), shouldMatch: false},
    75  			{input: input, subject: []byte("care"), shouldMatch: true},
    76  			{input: input, subject: []byte("supercar"), shouldMatch: false},
    77  			{input: input, subject: []byte("carer"), shouldMatch: true},
    78  		}
    79  
    80  		run(t, tests)
    81  	})
    82  
    83  	t.Run("with several wildcards", func(t *testing.T) {
    84  		input := []byte("*c?r*")
    85  		tests := []test{
    86  			{input: input, subject: []byte("car"), shouldMatch: true},
    87  			{input: input, subject: []byte("cap"), shouldMatch: false},
    88  			{input: input, subject: []byte("care"), shouldMatch: true},
    89  			{input: input, subject: []byte("supercar"), shouldMatch: true},
    90  			{input: input, subject: []byte("carer"), shouldMatch: true},
    91  		}
    92  
    93  		run(t, tests)
    94  	})
    95  
    96  	t.Run("with special characters", func(t *testing.T) {
    97  		input := []byte("car)")
    98  		tests := []test{
    99  			{input: input, subject: []byte("car)"), shouldMatch: true},
   100  			{input: input, subject: []byte("car))"), shouldMatch: false},
   101  			{input: input, subject: []byte("care}}"), shouldMatch: false},
   102  			{input: input, subject: []byte("/s/up{e)rca\\r"), shouldMatch: false},
   103  		}
   104  
   105  		run(t, tests)
   106  	})
   107  
   108  	t.Run("with complex special characters", func(t *testing.T) {
   109  		input := []byte("this-/is(my complex).text!")
   110  		tests := []test{
   111  			{input: input, subject: []byte("this-/is(my complex).text!"), shouldMatch: true},
   112  			{input: input, subject: []byte("this-/is(my complex).text!))"), shouldMatch: false},
   113  			{input: input, subject: []byte("///this-/is(my complex).text!}}"), shouldMatch: false},
   114  		}
   115  
   116  		run(t, tests)
   117  	})
   118  
   119  	t.Run("with special characters and wildcard", func(t *testing.T) {
   120  		subject := []byte("I love this fast car) that is yellow")
   121  		tests := []test{
   122  			{input: []byte("*car)*"), subject: subject, shouldMatch: true},
   123  			{input: []byte("*car))*"), subject: subject, shouldMatch: false},
   124  			{input: []byte("*care}}*"), subject: subject, shouldMatch: false},
   125  			{input: []byte("*/s/up{e)rca\\r*"), subject: subject, shouldMatch: false},
   126  		}
   127  
   128  		run(t, tests)
   129  	})
   130  }
   131  
   132  func TestLikeRegexp_ForOptimizability(t *testing.T) {
   133  	type test struct {
   134  		input               []byte
   135  		shouldBeOptimizable bool
   136  		expectedMin         []byte
   137  	}
   138  
   139  	run := func(t *testing.T, tests []test) {
   140  		for _, test := range tests {
   141  			t.Run(fmt.Sprintf("for input %q", string(test.input)), func(t *testing.T) {
   142  				res, err := parseLikeRegexp(test.input)
   143  				require.Nil(t, err)
   144  				assert.Equal(t, test.shouldBeOptimizable, res.optimizable)
   145  				assert.Equal(t, test.expectedMin, res.min)
   146  			})
   147  		}
   148  	}
   149  
   150  	tests := []test{
   151  		{input: []byte("car"), shouldBeOptimizable: true, expectedMin: []byte("car")},
   152  		{input: []byte("car*"), shouldBeOptimizable: true, expectedMin: []byte("car")},
   153  		{input: []byte("car?"), shouldBeOptimizable: true, expectedMin: []byte("car")},
   154  		{input: []byte("c?r"), shouldBeOptimizable: true, expectedMin: []byte("c")},
   155  		{input: []byte("car*taker"), shouldBeOptimizable: true, expectedMin: []byte("car")},
   156  		{input: []byte("car?tak*?*er"), shouldBeOptimizable: true, expectedMin: []byte("car")},
   157  		{input: []byte("?car"), shouldBeOptimizable: false, expectedMin: []byte{}},
   158  		{input: []byte("*car"), shouldBeOptimizable: false, expectedMin: []byte{}},
   159  		{input: []byte("*ca}r"), shouldBeOptimizable: false, expectedMin: []byte{}},
   160  		{input: []byte("*car)"), shouldBeOptimizable: false, expectedMin: []byte{}},
   161  	}
   162  
   163  	run(t, tests)
   164  }