github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/like_regexp.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"bytes"
    16  	"regexp"
    17  
    18  	"github.com/pkg/errors"
    19  )
    20  
    21  type likeRegexp struct {
    22  	optimizable bool
    23  	min         []byte
    24  	regexp      *regexp.Regexp
    25  }
    26  
    27  func parseLikeRegexp(in []byte) (*likeRegexp, error) {
    28  	r, err := regexp.Compile(transformLikeStringToRegexp(in))
    29  	if err != nil {
    30  		return nil, errors.Wrap(err, "compile regex from 'like' string")
    31  	}
    32  
    33  	min, ok := optimizable(in)
    34  	return &likeRegexp{
    35  		regexp:      r,
    36  		min:         min,
    37  		optimizable: ok,
    38  	}, nil
    39  }
    40  
    41  func transformLikeStringToRegexp(in []byte) string {
    42  	in = []byte(regexp.QuoteMeta(string(in)))
    43  	in = bytes.ReplaceAll(in, []byte("\\?"), []byte("."))
    44  	in = bytes.ReplaceAll(in, []byte("\\*"), []byte(".*"))
    45  	return "^" + string(in) + "$"
    46  }
    47  
    48  func optimizable(in []byte) ([]byte, bool) {
    49  	maxCharsWithoutWildcard := 0
    50  	for _, char := range in {
    51  		if isWildcardCharacter(char) {
    52  			break
    53  		}
    54  		maxCharsWithoutWildcard++
    55  	}
    56  
    57  	return in[:maxCharsWithoutWildcard], maxCharsWithoutWildcard > 0
    58  }
    59  
    60  func isWildcardCharacter(in byte) bool {
    61  	return in == '?' || in == '*'
    62  }