github.com/dolthub/go-mysql-server@v0.18.0/sql/expression/like.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package expression
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"regexp"
    21  	"strings"
    22  	"sync"
    23  	"unicode/utf8"
    24  
    25  	"github.com/dolthub/go-mysql-server/internal/regex"
    26  	"github.com/dolthub/go-mysql-server/sql"
    27  	"github.com/dolthub/go-mysql-server/sql/types"
    28  )
    29  
    30  func newDefaultLikeMatcher(likeStr string) (regex.DisposableMatcher, error) {
    31  	return regex.NewDisposableMatcher("go", likeStr)
    32  }
    33  
    34  // Like performs pattern matching against two strings.
    35  type Like struct {
    36  	BinaryExpressionStub
    37  	Escape sql.Expression
    38  	pool   *sync.Pool
    39  	once   sync.Once
    40  	cached bool
    41  }
    42  
    43  var _ sql.Expression = (*Like)(nil)
    44  var _ sql.CollationCoercible = (*Like)(nil)
    45  
    46  type likeMatcherErrTuple struct {
    47  	matcher LikeMatcher
    48  	err     error
    49  }
    50  
    51  // NewLike creates a new LIKE expression.
    52  func NewLike(left, right, escape sql.Expression) sql.Expression {
    53  	var cached = true
    54  	sql.Inspect(right, func(e sql.Expression) bool {
    55  		if _, ok := e.(*GetField); ok {
    56  			cached = false
    57  		}
    58  		return true
    59  	})
    60  
    61  	return &Like{
    62  		BinaryExpressionStub: BinaryExpressionStub{left, right},
    63  		Escape:               escape,
    64  		pool:                 nil,
    65  		once:                 sync.Once{},
    66  		cached:               cached,
    67  	}
    68  }
    69  
    70  // Type implements the sql.Expression interface.
    71  func (l *Like) Type() sql.Type { return types.Boolean }
    72  
    73  // CollationCoercibility implements the interface sql.CollationCoercible.
    74  func (l *Like) CollationCoercibility(ctx *sql.Context) (collation sql.CollationID, coercibility byte) {
    75  	leftCollation, leftCoercibility := sql.GetCoercibility(ctx, l.LeftChild)
    76  	rightCollation, rightCoercibility := sql.GetCoercibility(ctx, l.RightChild)
    77  	return sql.ResolveCoercibility(leftCollation, leftCoercibility, rightCollation, rightCoercibility)
    78  }
    79  
    80  // Eval implements the sql.Expression interface.
    81  func (l *Like) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
    82  	span, ctx := ctx.Span("expression.Like")
    83  	defer span.End()
    84  
    85  	left, err := l.LeftChild.Eval(ctx, row)
    86  	if err != nil || left == nil {
    87  		return nil, err
    88  	}
    89  	if _, ok := left.(string); !ok {
    90  		left, _, err = types.LongText.Convert(left)
    91  		if err != nil {
    92  			return nil, err
    93  		}
    94  	}
    95  
    96  	var lm LikeMatcher
    97  	if !l.cached {
    98  		// for non-cached regex every time create a new matcher
    99  		right, escape, rerr := l.evalRight(ctx, row)
   100  		if rerr != nil {
   101  			return nil, rerr
   102  		}
   103  		if right == nil {
   104  			return nil, nil
   105  		}
   106  		collation, _ := l.CollationCoercibility(ctx)
   107  		lm, err = ConstructLikeMatcher(collation, *right, escape)
   108  	} else {
   109  		l.once.Do(func() {
   110  			right, escape, err := l.evalRight(ctx, row)
   111  			l.pool = &sync.Pool{
   112  				New: func() interface{} {
   113  					if err != nil || right == nil {
   114  						return likeMatcherErrTuple{LikeMatcher{}, err}
   115  					}
   116  					collation, _ := l.CollationCoercibility(ctx)
   117  					m, e := ConstructLikeMatcher(collation, *right, escape)
   118  					return likeMatcherErrTuple{m, e}
   119  				},
   120  			}
   121  		})
   122  		tpl := l.pool.Get().(likeMatcherErrTuple)
   123  		lm, err = tpl.matcher, tpl.err
   124  	}
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  	if lm.collation == sql.Collation_Unspecified {
   129  		return false, nil
   130  	}
   131  
   132  	ok := lm.Match(left.(string))
   133  	if l.cached {
   134  		l.pool.Put(likeMatcherErrTuple{lm, nil})
   135  	}
   136  	return ok, nil
   137  }
   138  
   139  func (l *Like) evalRight(ctx *sql.Context, row sql.Row) (right *string, escape rune, err error) {
   140  	rightVal, err := l.RightChild.Eval(ctx, row)
   141  	if err != nil || rightVal == nil {
   142  		return nil, 0, err
   143  	}
   144  	if _, ok := rightVal.(string); !ok {
   145  		rightVal, _, err = types.LongText.Convert(rightVal)
   146  		if err != nil {
   147  			return nil, 0, err
   148  		}
   149  	}
   150  
   151  	var escapeVal interface{}
   152  	if l.Escape != nil {
   153  		escapeVal, err = l.Escape.Eval(ctx, row)
   154  		if err != nil {
   155  			return nil, 0, err
   156  		}
   157  		if escapeVal == nil {
   158  			escapeVal = `\`
   159  		}
   160  		if _, ok := escapeVal.(string); !ok {
   161  			escapeVal, _, err = types.LongText.Convert(escapeVal)
   162  			if err != nil {
   163  				return nil, 0, err
   164  			}
   165  		}
   166  		if utf8.RuneCountInString(escapeVal.(string)) > 1 {
   167  			return nil, 0, sql.ErrInvalidArgument.New("ESCAPE")
   168  		}
   169  	} else {
   170  		escapeVal = `\`
   171  	}
   172  
   173  	rightStr := rightVal.(string)
   174  	return &rightStr, []rune(escapeVal.(string))[0], nil
   175  }
   176  
   177  func (l *Like) String() string {
   178  	return fmt.Sprintf("%s LIKE %s", l.LeftChild, l.RightChild)
   179  }
   180  
   181  // WithChildren implements the Expression interface.
   182  func (l *Like) WithChildren(children ...sql.Expression) (sql.Expression, error) {
   183  	if len(children) != 2 {
   184  		return nil, sql.ErrInvalidChildrenNumber.New(l, len(children), 2)
   185  	}
   186  	return NewLike(children[0], children[1], l.Escape), nil
   187  }
   188  
   189  func patternToGoRegex(pattern string) string {
   190  	var buf bytes.Buffer
   191  	buf.WriteString("(?s)")
   192  	buf.WriteRune('^')
   193  	var escaped bool
   194  	for _, r := range strings.Replace(regexp.QuoteMeta(pattern), `\\`, `\`, -1) {
   195  		switch r {
   196  		case '_':
   197  			if escaped {
   198  				buf.WriteRune(r)
   199  			} else {
   200  				buf.WriteRune('.')
   201  			}
   202  		case '%':
   203  			if escaped {
   204  				buf.WriteRune(r)
   205  			} else {
   206  				buf.WriteString(".*")
   207  			}
   208  		case '\\':
   209  			if escaped {
   210  				buf.WriteString(`\\`)
   211  			} else {
   212  				escaped = true
   213  				continue
   214  			}
   215  		default:
   216  			if escaped {
   217  				buf.WriteString(`\`)
   218  			}
   219  			buf.WriteRune(r)
   220  		}
   221  
   222  		if escaped {
   223  			escaped = false
   224  		}
   225  	}
   226  
   227  	buf.WriteRune('$')
   228  	return buf.String()
   229  }
   230  
   231  func patternToGoRegexWithEscape(pattern, escape string) string {
   232  	var buf bytes.Buffer
   233  	buf.WriteString("(?s)")
   234  	buf.WriteRune('^')
   235  	var escaped bool
   236  
   237  	for _, r := range strings.Replace(strings.Replace(regexp.QuoteMeta(pattern), `\\`, `\`, -1), regexp.QuoteMeta(escape), escape, -1) {
   238  		switch r {
   239  		case rune(escape[0]):
   240  			if escaped {
   241  				buf.WriteString(regexp.QuoteMeta(escape))
   242  			} else {
   243  				escaped = true
   244  				continue
   245  			}
   246  		case '_':
   247  			if escaped {
   248  				buf.WriteRune(r)
   249  			} else {
   250  				buf.WriteRune('.')
   251  			}
   252  		case '%':
   253  			if escaped {
   254  				buf.WriteRune(r)
   255  			} else {
   256  				buf.WriteString(".*")
   257  			}
   258  		case '\\':
   259  			if escaped {
   260  				buf.WriteString(`\\`)
   261  			} else {
   262  				escaped = true
   263  				continue
   264  			}
   265  		default:
   266  			if escaped {
   267  				buf.WriteString(`\`)
   268  			}
   269  			buf.WriteRune(r)
   270  		}
   271  
   272  		if escaped {
   273  			escaped = false
   274  		}
   275  	}
   276  
   277  	buf.WriteRune('$')
   278  	return buf.String()
   279  }
   280  
   281  // LikeMatcher is a collation-supported matcher for LIKE expressions.
   282  type LikeMatcher struct {
   283  	nodes     []likeMatcherNode
   284  	collation sql.CollationID
   285  	escape    rune
   286  }
   287  
   288  // ConstructLikeMatcher returns a new LikeMatcher.
   289  func ConstructLikeMatcher(collation sql.CollationID, pattern string, escape rune) (LikeMatcher, error) {
   290  	charsetEncoder := collation.CharacterSet().Encoder()
   291  	sorter := collation.Sorter()
   292  	matcher := LikeMatcher{nil, collation, escape}
   293  	for i := 0; i < len(pattern); {
   294  		nextRune, advance := charsetEncoder.NextRune(pattern[i:])
   295  		if nextRune == utf8.RuneError {
   296  			return LikeMatcher{}, sql.ErrCharSetInvalidString.New(collation.CharacterSet().Name(), pattern)
   297  		}
   298  		i += advance
   299  
   300  		switch nextRune {
   301  		case '_': // Matches any single character
   302  			matcher.nodes = append(matcher.nodes, likeMatcherRune{'_', -1})
   303  		case '%': // Matches any sequence of characters, including the empty sequence
   304  			matcher.nodes = append(matcher.nodes, likeMatcherAny{})
   305  		case escape: // States that the next character should be taken literally
   306  			nextRune, advance = charsetEncoder.NextRune(pattern[i:])
   307  			if nextRune == utf8.RuneError {
   308  				return LikeMatcher{}, sql.ErrCharSetInvalidString.New(collation.CharacterSet().Name(), pattern)
   309  			}
   310  			i += advance
   311  			matcher.nodes = append(matcher.nodes, likeMatcherRune{nextRune, sorter(nextRune)})
   312  		default: // A regular character that we'll match against
   313  			matcher.nodes = append(matcher.nodes, likeMatcherRune{nextRune, sorter(nextRune)})
   314  		}
   315  	}
   316  	return matcher, nil
   317  }
   318  
   319  // Match returns whether the given string conforms to the nodes contained in this matcher.
   320  func (l LikeMatcher) Match(s string) bool {
   321  	if len(l.nodes) == 0 {
   322  		if len(s) == 0 {
   323  			return true
   324  		}
   325  		return false
   326  	}
   327  
   328  	charsetEncoder := l.collation.CharacterSet().Encoder()
   329  	stringIndex := 0
   330  	nodeIndex := 0
   331  	nodeNextIndex := make([]int, 0, len(l.nodes))
   332  	for {
   333  		// If both indexes equal their lengths, we've fully matched the string with all nodes
   334  		if stringIndex == len(s) && nodeIndex == len(l.nodes) {
   335  			return true
   336  		}
   337  		// If all nodes have found a match but we still have runes left in the string, we backtrack to allow earlier
   338  		// nodes to match more runes. If we're unable to backtrack, then the string does not match.
   339  		if stringIndex < len(s) && nodeIndex == len(l.nodes) {
   340  			var matched bool
   341  			matched, nodeIndex = l.backtrack(s, nodeIndex-1, nodeNextIndex)
   342  			if !matched {
   343  				return false
   344  			}
   345  			nodeNextIndex = nodeNextIndex[:nodeIndex]
   346  			stringIndex = nodeNextIndex[nodeIndex-1]
   347  			continue
   348  		}
   349  		// If all runes have found a match but we still have nodes left in the matcher, we check if the remaining nodes
   350  		// are all "any sequence" nodes. If they're not, then the string is too short and does not match.
   351  		if stringIndex == len(s) && nodeIndex < len(l.nodes) {
   352  			for ; nodeIndex < len(l.nodes); nodeIndex++ {
   353  				if _, ok := l.nodes[nodeIndex].(likeMatcherAny); !ok {
   354  					return false
   355  				}
   356  			}
   357  			return true
   358  		}
   359  
   360  		nextRune, advance := charsetEncoder.NextRune(s[stringIndex:])
   361  		if nextRune == utf8.RuneError {
   362  			return false
   363  		}
   364  		matched, consumed := l.nodes[nodeIndex].Match(l.collation, nextRune)
   365  		if consumed {
   366  			stringIndex += advance
   367  		}
   368  		if matched {
   369  			nodeNextIndex = append(nodeNextIndex, stringIndex)
   370  			nodeIndex++
   371  		} else {
   372  			// If we didn't match on this rune, we backtrack to allow earlier nodes to match more runes
   373  			matched, nodeIndex = l.backtrack(s, nodeIndex, nodeNextIndex)
   374  			if !matched {
   375  				return false
   376  			}
   377  			nodeNextIndex = nodeNextIndex[:nodeIndex]
   378  			stringIndex = nodeNextIndex[nodeIndex-1]
   379  			continue
   380  		}
   381  	}
   382  	// Must return something here to compile, but the above loop will handle all return cases
   383  	return false
   384  }
   385  
   386  // String returns the string form of this LIKE expression. If an Escape character was provided, it is used instead of
   387  // the default.
   388  func (l LikeMatcher) String() string {
   389  	sb := strings.Builder{}
   390  	for _, node := range l.nodes {
   391  		switch node := node.(type) {
   392  		case likeMatcherRune:
   393  			if node.original == '%' {
   394  				sb.WriteRune(l.escape)
   395  				sb.WriteRune('%')
   396  			} else if node.original == '_' {
   397  				if node.sortOrder != -1 {
   398  					sb.WriteRune(l.escape)
   399  				}
   400  				sb.WriteRune('_')
   401  			} else {
   402  				sb.WriteRune(node.original)
   403  			}
   404  		case likeMatcherAny:
   405  			sb.WriteRune('%')
   406  		}
   407  	}
   408  	return sb.String()
   409  }
   410  
   411  // backtrack unwinds the stack until we can find a node that can match the next rune compared to the rune that it last
   412  // matched against. The returned node index is the index to use for the next match.
   413  func (l LikeMatcher) backtrack(s string, nodeIndex int, nodeNextIndex []int) (matched bool, newNodeIndex int) {
   414  	charsetEncoder := l.collation.CharacterSet().Encoder()
   415  	// If the slice doesn't contain an entry for the node, then that node was never matched (and therefore we can't
   416  	// backtrack over it).
   417  	if nodeIndex >= len(nodeNextIndex) {
   418  		nodeIndex = len(nodeNextIndex) - 1
   419  	}
   420  	for ; nodeIndex >= 0; nodeIndex-- {
   421  		stringIndex := nodeNextIndex[nodeIndex]
   422  		nextRune, advance := charsetEncoder.NextRune(s[stringIndex:])
   423  		if nextRune == utf8.RuneError {
   424  			return false, 0
   425  		}
   426  		if l.nodes[nodeIndex].MatchNext(l.collation, nextRune) {
   427  			nodeNextIndex[nodeIndex] = stringIndex + advance
   428  			return true, nodeIndex + 1
   429  		}
   430  	}
   431  	// We exhausted all nodes, no nodes may match further
   432  	return false, 0
   433  }
   434  
   435  // likeMatcherNode handles the match characteristics for a particular character from the pattern.
   436  type likeMatcherNode interface {
   437  	// Match returns whether the given rune is matched on the initial match, and also whether this rune is consumed. If
   438  	// not consumed, the same rune will be given to the next node. It is assumed that consuming a rune always matches
   439  	// the rune.
   440  	Match(collation sql.CollationID, r rune) (matched bool, consumed bool)
   441  	// MatchNext returns whether the given rune is matched on a subsequent match. Only the first match may optionally
   442  	// consume a rune, all subsequent matches will consume the rune.
   443  	MatchNext(collation sql.CollationID, r rune) bool
   444  }
   445  
   446  // likeMatcherRune matches exactly one rune. If the sort order is negative, then this matches any rune (but still only
   447  // a single rune).
   448  type likeMatcherRune struct {
   449  	original  rune
   450  	sortOrder int32
   451  }
   452  
   453  var _ likeMatcherNode = likeMatcherRune{}
   454  
   455  // Match implements the interface likeMatcherNode.
   456  func (l likeMatcherRune) Match(collation sql.CollationID, r rune) (matched bool, consumed bool) {
   457  	if l.sortOrder < 0 || collation.Sorter()(r) == l.sortOrder {
   458  		return true, true
   459  	}
   460  	return false, false
   461  }
   462  
   463  // MatchNext implements the interface likeMatcherNode. As this only matches a single rune, all subsequent matches will
   464  // fail.
   465  func (l likeMatcherRune) MatchNext(collation sql.CollationID, r rune) bool {
   466  	return false
   467  }
   468  
   469  // likeMatcherAny matches any sequence of characters, including the empty sequence.
   470  type likeMatcherAny struct{}
   471  
   472  var _ likeMatcherNode = likeMatcherAny{}
   473  
   474  // Match implements the interface likeMatcherNode. This node is a reluctant matcher, meaning it attempts to match as few
   475  // runes as possible. As this will always match the empty sequence first, we'll return true on the match, but will not
   476  // consume the given rune.
   477  func (l likeMatcherAny) Match(collation sql.CollationID, r rune) (matched bool, consumed bool) {
   478  	return true, false
   479  }
   480  
   481  // MatchNext implements the interface likeMatcherNode.
   482  func (l likeMatcherAny) MatchNext(collation sql.CollationID, r rune) bool {
   483  	return true
   484  }