github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/function/func_builtin_regexp.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package function
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"regexp"
    21  	"unicode/utf8"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/util"
    27  	"github.com/matrixorigin/matrixone/pkg/container/types"
    28  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    29  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function/functionUtil"
    30  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    31  )
    32  
    33  const (
    34  	DefaultEscapeChar = '\\'
    35  
    36  	mapSizeForRegexp = 100
    37  )
    38  
    39  type opBuiltInRegexp struct {
    40  	regMap regexpSet
    41  }
    42  
    43  func newOpBuiltInRegexp() *opBuiltInRegexp {
    44  	return &opBuiltInRegexp{
    45  		regMap: regexpSet{
    46  			mp: make(map[string]*regexp.Regexp, mapSizeForRegexp),
    47  		},
    48  	}
    49  }
    50  
    51  func (op *opBuiltInRegexp) likeFn(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error {
    52  	p1 := vector.GenerateFunctionStrParameter(parameters[0])
    53  	p2 := vector.GenerateFunctionStrParameter(parameters[1])
    54  	rs := vector.MustFunctionResult[bool](result)
    55  
    56  	// optimize rule for some special case.
    57  	if parameters[1].IsConst() {
    58  		canOptimize, err := optimizeRuleForLike(p1, p2, rs, length, func(i []byte) []byte {
    59  			return i
    60  		})
    61  		if canOptimize {
    62  			return err
    63  		}
    64  	}
    65  
    66  	return opBinaryBytesBytesToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 []byte) (bool, error) {
    67  		return op.regMap.regularMatchForLikeOp(v2, v1)
    68  	})
    69  }
    70  
    71  func (op *opBuiltInRegexp) iLikeFn(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error {
    72  	p1 := vector.GenerateFunctionStrParameter(parameters[0])
    73  	p2 := vector.GenerateFunctionStrParameter(parameters[1])
    74  	rs := vector.MustFunctionResult[bool](result)
    75  
    76  	// optimize rule for some special case.
    77  	if parameters[1].IsConst() {
    78  		canOptimize, err := optimizeRuleForLike(p1, p2, rs, length, func(i []byte) []byte {
    79  			return bytes.ToLower(i)
    80  		})
    81  		if canOptimize {
    82  			return err
    83  		}
    84  	}
    85  
    86  	return opBinaryBytesBytesToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 []byte) (bool, error) {
    87  		return op.regMap.regularMatchForLikeOp(bytes.ToLower(v2), bytes.ToLower(v1))
    88  	})
    89  }
    90  
    91  func optimizeRuleForLike(p1, p2 vector.FunctionParameterWrapper[types.Varlena], rs *vector.FunctionResult[bool], length int,
    92  	specialFnForV func([]byte) []byte) (bool, error) {
    93  	pat, null := p2.GetStrValue(0)
    94  	if null {
    95  		nulls.AddRange(rs.GetResultVector().GetNulls(), 0, uint64(length))
    96  		return true, nil
    97  	}
    98  	pat = specialFnForV(pat)
    99  
   100  	n := len(pat)
   101  	// opt rule #1: if expr is empty string, only empty string like empty string.
   102  	if n == 0 {
   103  		for i := uint64(0); i < uint64(length); i++ {
   104  			v1, null1 := p1.GetStrValue(i)
   105  			v1 = specialFnForV(v1)
   106  			if err := rs.Append(len(v1) == 0, null1); err != nil {
   107  				return true, err
   108  			}
   109  		}
   110  		return true, nil
   111  	}
   112  	// opt rule #2.1: anything matches %
   113  	if n == 1 && pat[0] == '%' {
   114  		for i := uint64(0); i < uint64(length); i++ {
   115  			_, null1 := p1.GetStrValue(i)
   116  			if err := rs.Append(true, null1); err != nil {
   117  				return true, err
   118  			}
   119  		}
   120  		return true, nil
   121  	}
   122  	// opt rule #2.2: single char matches _
   123  	// XXX in UTF8 world, should we do single RUNE matches _?
   124  	if n == 1 && pat[0] == '_' {
   125  		for i := uint64(0); i < uint64(length); i++ {
   126  			v1, null1 := p1.GetStrValue(i)
   127  			v1 = specialFnForV(v1)
   128  			if err := rs.Append(len(v1) == 1, null1); err != nil {
   129  				return true, err
   130  			}
   131  		}
   132  		return true, nil
   133  	}
   134  	// opt rule #2.3: single char, no wild card, so it is a simple compare eq.
   135  	if n == 1 && pat[0] != '_' && pat[0] != '%' {
   136  		for i := uint64(0); i < uint64(length); i++ {
   137  			v1, null1 := p1.GetStrValue(i)
   138  			v1 = specialFnForV(v1)
   139  			if err := rs.Append(len(v1) == 1 && v1[0] == pat[0], null1); err != nil {
   140  				return true, err
   141  			}
   142  		}
   143  		return true, nil
   144  	}
   145  
   146  	// opt rule #3: [_%]somethingInBetween[_%]
   147  	if n > 1 {
   148  		c0, c1 := pat[0], pat[n-1]
   149  		if !bytes.ContainsAny(pat[1:len(pat)-1], "_%") {
   150  			if n > 2 && pat[n-2] == DefaultEscapeChar {
   151  				c1 = DefaultEscapeChar
   152  			}
   153  			switch {
   154  			case !(c0 == '%' || c0 == '_') && !(c1 == '%' || c1 == '_'):
   155  				// Rule 4.1: no wild card, so it is a simple compare eq.
   156  				for i := uint64(0); i < uint64(length); i++ {
   157  					v1, null1 := p1.GetStrValue(i)
   158  					v1 = specialFnForV(v1)
   159  					if err := rs.Append(len(v1) == n && bytes.Equal(pat, v1), null1); err != nil {
   160  						return true, err
   161  					}
   162  				}
   163  				return true, nil
   164  
   165  			case c0 == '_' && !(c1 == '%' || c1 == '_'):
   166  				// Rule 4.2: _foobarzoo,
   167  				for i := uint64(0); i < uint64(length); i++ {
   168  					v1, null1 := p1.GetStrValue(i)
   169  					v1 = specialFnForV(v1)
   170  					if err := rs.Append(len(v1) == n && bytes.Equal(pat[1:], v1[1:]), null1); err != nil {
   171  						return true, err
   172  					}
   173  				}
   174  				return true, nil
   175  
   176  			case c0 == '%' && !(c1 == '%' || c1 == '_'):
   177  				// Rule 4.3, %foobarzoo, it turns into a suffix match.
   178  				suffix := functionUtil.RemoveEscapeChar(pat[1:], DefaultEscapeChar)
   179  				for i := uint64(0); i < uint64(length); i++ {
   180  					v1, null1 := p1.GetStrValue(i)
   181  					v1 = specialFnForV(v1)
   182  					if err := rs.Append(bytes.HasSuffix(v1, suffix), null1); err != nil {
   183  						return true, err
   184  					}
   185  				}
   186  				return true, nil
   187  
   188  			case c1 == '_' && !(c0 == '%' || c0 == '_'):
   189  				// Rule 4.4, foobarzoo_, it turns into eq ingoring last char.
   190  				prefix := functionUtil.RemoveEscapeChar(pat[:n-1], DefaultEscapeChar)
   191  				for i := uint64(0); i < uint64(length); i++ {
   192  					v1, null1 := p1.GetStrValue(i)
   193  					v1 = specialFnForV(v1)
   194  					if err := rs.Append(len(v1) == n && bytes.Equal(prefix, v1[:n-1]), null1); err != nil {
   195  						return true, err
   196  					}
   197  				}
   198  				return true, nil
   199  
   200  			case c1 == '%' && !(c0 == '%' || c0 == '_'):
   201  				// Rule 4.5 foobarzoo%, prefix match
   202  				prefix := functionUtil.RemoveEscapeChar(pat[:n-1], DefaultEscapeChar)
   203  				for i := uint64(0); i < uint64(length); i++ {
   204  					v1, null1 := p1.GetStrValue(i)
   205  					v1 = specialFnForV(v1)
   206  					if err := rs.Append(bytes.HasPrefix(v1, prefix), null1); err != nil {
   207  						return true, err
   208  					}
   209  				}
   210  				return true, nil
   211  
   212  			case c0 == '%' && c1 == '%':
   213  				// Rule 4.6 %foobarzoo%, now it is contains
   214  				substr := functionUtil.RemoveEscapeChar(pat[1:n-1], DefaultEscapeChar)
   215  				for i := uint64(0); i < uint64(length); i++ {
   216  					v1, null1 := p1.GetStrValue(i)
   217  					v1 = specialFnForV(v1)
   218  					if err := rs.Append(bytes.Contains(v1, substr), null1); err != nil {
   219  						return true, err
   220  					}
   221  				}
   222  				return true, nil
   223  
   224  			case c0 == '%' && c1 == '_':
   225  				// Rule 4.7 %foobarzoo_,
   226  				suffix := functionUtil.RemoveEscapeChar(pat[1:n-1], DefaultEscapeChar)
   227  				for i := uint64(0); i < uint64(length); i++ {
   228  					v1, null1 := p1.GetStrValue(i)
   229  					v1 = specialFnForV(v1)
   230  					if err := rs.Append(len(v1) > 0 && bytes.HasSuffix(v1[:len(v1)-1], suffix), null1); err != nil {
   231  						return true, err
   232  					}
   233  				}
   234  				return true, nil
   235  
   236  			case c0 == '_' && c1 == '%':
   237  				// Rule 4.8 _foobarzoo%
   238  				prefix := functionUtil.RemoveEscapeChar(pat[1:n-1], DefaultEscapeChar)
   239  				for i := uint64(0); i < uint64(length); i++ {
   240  					v1, null1 := p1.GetStrValue(i)
   241  					v1 = specialFnForV(v1)
   242  					if err := rs.Append(len(v1) > 0 && bytes.HasPrefix(v1[1:], prefix), null1); err != nil {
   243  						return true, err
   244  					}
   245  				}
   246  				return true, nil
   247  			}
   248  		} else if c0 == '%' && c1 == '%' && !bytes.Contains(pat[1:len(pat)-1], []byte{'_'}) && !bytes.Contains(pat, []byte{'\\', '%'}) {
   249  			pat0 := pat[1:]
   250  			var subpats [][]byte
   251  			for {
   252  				idx := bytes.IndexByte(pat0, '%')
   253  				if idx == -1 {
   254  					break
   255  				}
   256  				subpats = append(subpats, pat0[:idx])
   257  				pat0 = pat0[idx+1:]
   258  			}
   259  
   260  		outer:
   261  			for i := uint64(0); i < uint64(length); i++ {
   262  				v1, null1 := p1.GetStrValue(i)
   263  				if null1 {
   264  					rs.AppendMustNull()
   265  				} else {
   266  					for _, sp := range subpats {
   267  						idx := bytes.Index(v1, sp)
   268  						if idx == -1 {
   269  							rs.AppendMustValue(false)
   270  							continue outer
   271  						}
   272  						v1 = v1[idx+len(sp):]
   273  					}
   274  					rs.AppendMustValue(true)
   275  				}
   276  			}
   277  			return true, nil
   278  		}
   279  	}
   280  	return false, nil
   281  }
   282  
   283  func (op *opBuiltInRegexp) builtInRegMatch(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error {
   284  	return opBinaryStrStrToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 string) (bool, error) {
   285  		reg, err := op.regMap.getRegularMatcher(v2)
   286  		if err != nil {
   287  			return false, err
   288  		}
   289  		return reg.MatchString(v1), nil
   290  	})
   291  }
   292  
   293  func (op *opBuiltInRegexp) builtInNotRegMatch(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error {
   294  	return opBinaryStrStrToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 string) (bool, error) {
   295  		reg, err := op.regMap.getRegularMatcher(v2)
   296  		if err != nil {
   297  			return false, err
   298  		}
   299  		return !reg.MatchString(v1), nil
   300  	})
   301  }
   302  
   303  func (op *opBuiltInRegexp) builtInRegexpSubstr(parameters []*vector.Vector, result vector.FunctionResultWrapper, _ *process.Process, length int) error {
   304  	p1 := vector.GenerateFunctionStrParameter(parameters[0])
   305  	p2 := vector.GenerateFunctionStrParameter(parameters[1])
   306  
   307  	rs := vector.MustFunctionResult[types.Varlena](result)
   308  	switch len(parameters) {
   309  	case 2:
   310  		for i := uint64(0); i < uint64(length); i++ {
   311  			v1, null1 := p1.GetStrValue(i)
   312  			v2, null2 := p2.GetStrValue(i)
   313  			if null1 || null2 || len(v2) == 0 {
   314  				if err := rs.AppendBytes(nil, true); err != nil {
   315  					return err
   316  				}
   317  			} else {
   318  				expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2)
   319  				match, res, err := op.regMap.regularSubstr(pat, expr, 1, 1)
   320  				if err != nil {
   321  					return err
   322  				}
   323  				if err = rs.AppendBytes(functionUtil.QuickStrToBytes(res), !match); err != nil {
   324  					return err
   325  				}
   326  			}
   327  		}
   328  
   329  	case 3:
   330  		positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2])
   331  		for i := uint64(0); i < uint64(length); i++ {
   332  			v1, null1 := p1.GetStrValue(i)
   333  			v2, null2 := p2.GetStrValue(i)
   334  			pos, null3 := positions.GetValue(i)
   335  			if null1 || null2 || null3 || len(v2) == 0 {
   336  				if err := rs.AppendBytes(nil, true); err != nil {
   337  					return err
   338  				}
   339  			} else {
   340  				expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2)
   341  				match, res, err := op.regMap.regularSubstr(pat, expr, pos, 1)
   342  				if err != nil {
   343  					return err
   344  				}
   345  				if err = rs.AppendBytes(functionUtil.QuickStrToBytes(res), !match); err != nil {
   346  					return err
   347  				}
   348  			}
   349  		}
   350  
   351  	case 4:
   352  		positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2])
   353  		occurrences := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3])
   354  		for i := uint64(0); i < uint64(length); i++ {
   355  			v1, null1 := p1.GetStrValue(i)
   356  			v2, null2 := p2.GetStrValue(i)
   357  			pos, null3 := positions.GetValue(i)
   358  			ocur, null4 := occurrences.GetValue(i)
   359  			if null1 || null2 || null3 || null4 || len(v2) == 0 {
   360  				if err := rs.AppendBytes(nil, true); err != nil {
   361  					return err
   362  				}
   363  			} else {
   364  				expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2)
   365  				match, res, err := op.regMap.regularSubstr(pat, expr, pos, ocur)
   366  				if err != nil {
   367  					return err
   368  				}
   369  				if err = rs.AppendBytes(functionUtil.QuickStrToBytes(res), !match); err != nil {
   370  					return err
   371  				}
   372  			}
   373  		}
   374  		return nil
   375  
   376  	}
   377  	return nil
   378  }
   379  
   380  func (op *opBuiltInRegexp) builtInRegexpInstr(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error {
   381  	p1 := vector.GenerateFunctionStrParameter(parameters[0])
   382  	p2 := vector.GenerateFunctionStrParameter(parameters[1])
   383  
   384  	rs := vector.MustFunctionResult[int64](result)
   385  	switch len(parameters) {
   386  	case 2:
   387  		return opBinaryStrStrToFixedWithErrorCheck[int64](parameters, result, proc, length, func(v1, v2 string) (int64, error) {
   388  			return op.regMap.regularInstr(v2, v1, 1, 1, 0)
   389  		})
   390  
   391  	case 3:
   392  		positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2])
   393  		for i := uint64(0); i < uint64(length); i++ {
   394  			v1, null1 := p1.GetStrValue(i)
   395  			v2, null2 := p2.GetStrValue(i)
   396  			pos, null3 := positions.GetValue(i)
   397  			if null1 || null2 || null3 {
   398  				if err := rs.Append(0, true); err != nil {
   399  					return err
   400  				}
   401  			} else {
   402  				expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2)
   403  				index, err := op.regMap.regularInstr(pat, expr, pos, 1, 0)
   404  				if err != nil {
   405  					return err
   406  				}
   407  				if err = rs.Append(index, false); err != nil {
   408  					return err
   409  				}
   410  			}
   411  		}
   412  
   413  	case 4:
   414  		positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2])
   415  		occurrences := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3])
   416  		for i := uint64(0); i < uint64(length); i++ {
   417  			v1, null1 := p1.GetStrValue(i)
   418  			v2, null2 := p2.GetStrValue(i)
   419  			pos, null3 := positions.GetValue(i)
   420  			ocur, null4 := occurrences.GetValue(i)
   421  			if null1 || null2 || null3 || null4 {
   422  				if err := rs.Append(0, true); err != nil {
   423  					return err
   424  				}
   425  			} else {
   426  				expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2)
   427  				index, err := op.regMap.regularInstr(pat, expr, pos, ocur, 0)
   428  				if err != nil {
   429  					return err
   430  				}
   431  				if err = rs.Append(index, false); err != nil {
   432  					return err
   433  				}
   434  			}
   435  		}
   436  		return nil
   437  
   438  	case 5:
   439  		positions := vector.GenerateFunctionFixedTypeParameter[int64](parameters[2])
   440  		occurrences := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3])
   441  		resultOption := vector.GenerateFunctionFixedTypeParameter[int8](parameters[4])
   442  		for i := uint64(0); i < uint64(length); i++ {
   443  			v1, null1 := p1.GetStrValue(i)
   444  			v2, null2 := p2.GetStrValue(i)
   445  			pos, null3 := positions.GetValue(i)
   446  			ocur, null4 := occurrences.GetValue(i)
   447  			resOp, null5 := resultOption.GetValue(i)
   448  			if null1 || null2 || null3 || null4 || null5 {
   449  				if err := rs.Append(0, true); err != nil {
   450  					return err
   451  				}
   452  			} else {
   453  				expr, pat := functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v2)
   454  				index, err := op.regMap.regularInstr(pat, expr, pos, ocur, resOp)
   455  				if err != nil {
   456  					return err
   457  				}
   458  				if err = rs.Append(index, false); err != nil {
   459  					return err
   460  				}
   461  			}
   462  		}
   463  	}
   464  	return nil
   465  }
   466  
   467  func (op *opBuiltInRegexp) builtInRegexpLike(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int) error {
   468  	p1 := vector.GenerateFunctionStrParameter(parameters[0])
   469  	p2 := vector.GenerateFunctionStrParameter(parameters[1])
   470  	rs := vector.MustFunctionResult[bool](result)
   471  
   472  	if len(parameters) == 2 {
   473  		return opBinaryStrStrToFixedWithErrorCheck[bool](parameters, result, proc, length, func(v1, v2 string) (bool, error) {
   474  			match, err := op.regMap.regularLike(v2, v1, "c")
   475  			return match, err
   476  		})
   477  	} else if len(parameters) == 3 {
   478  		if parameters[2].IsConstNull() {
   479  			nulls.AddRange(rs.GetResultVector().GetNulls(), 0, uint64(length))
   480  			return nil
   481  		}
   482  
   483  		p3 := vector.GenerateFunctionStrParameter(parameters[2])
   484  		for i := uint64(0); i < uint64(length); i++ {
   485  			expr, null1 := p1.GetStrValue(i)
   486  			pat, null2 := p2.GetStrValue(i)
   487  			mt, null3 := p3.GetStrValue(i)
   488  			if null1 || null2 || null3 {
   489  				if err := rs.Append(false, true); err != nil {
   490  					return err
   491  				}
   492  			} else {
   493  				match, err := op.regMap.regularLike(string(pat), string(expr), string(mt))
   494  				if err != nil {
   495  					return err
   496  				}
   497  				if err = rs.Append(match, false); err != nil {
   498  					return err
   499  				}
   500  			}
   501  		}
   502  	}
   503  	return nil
   504  }
   505  
   506  func (op *opBuiltInRegexp) builtInRegexpReplace(parameters []*vector.Vector, result vector.FunctionResultWrapper, _ *process.Process, length int) error {
   507  	p1 := vector.GenerateFunctionStrParameter(parameters[0]) // expr
   508  	p2 := vector.GenerateFunctionStrParameter(parameters[1]) // pat
   509  	p3 := vector.GenerateFunctionStrParameter(parameters[2]) // repl
   510  	rs := vector.MustFunctionResult[types.Varlena](result)
   511  
   512  	if parameters[0].IsConstNull() || parameters[1].IsConstNull() || parameters[2].IsConstNull() {
   513  		for i := uint64(0); i < uint64(length); i++ {
   514  			if err := rs.AppendBytes(nil, true); err != nil {
   515  				return err
   516  			}
   517  		}
   518  		return nil
   519  	}
   520  
   521  	switch len(parameters) {
   522  	case 3:
   523  		for i := uint64(0); i < uint64(length); i++ {
   524  			v1, null1 := p1.GetStrValue(i)
   525  			v2, null2 := p2.GetStrValue(i)
   526  			v3, null3 := p3.GetStrValue(i)
   527  			if null1 || null2 || null3 {
   528  				if err := rs.AppendBytes(nil, true); err != nil {
   529  					return err
   530  				}
   531  			} else {
   532  				val, err := op.regMap.regularReplace(functionUtil.QuickBytesToStr(v2), functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v3), 1, 0)
   533  				if err != nil {
   534  					return err
   535  				}
   536  				if err = rs.AppendBytes([]byte(val), false); err != nil {
   537  					return err
   538  				}
   539  			}
   540  		}
   541  
   542  	case 4:
   543  		p4 := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3])
   544  		for i := uint64(0); i < uint64(length); i++ {
   545  			v1, null1 := p1.GetStrValue(i)
   546  			v2, null2 := p2.GetStrValue(i)
   547  			v3, null3 := p3.GetStrValue(i)
   548  			v4, null4 := p4.GetValue(i)
   549  			if null1 || null2 || null3 || null4 {
   550  				if err := rs.AppendBytes(nil, true); err != nil {
   551  					return err
   552  				}
   553  			} else {
   554  				val, err := op.regMap.regularReplace(functionUtil.QuickBytesToStr(v2), functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v3), v4, 0)
   555  				if err != nil {
   556  					return err
   557  				}
   558  				if err = rs.AppendBytes([]byte(val), false); err != nil {
   559  					return err
   560  				}
   561  			}
   562  		}
   563  
   564  	case 5:
   565  		p4 := vector.GenerateFunctionFixedTypeParameter[int64](parameters[3])
   566  		p5 := vector.GenerateFunctionFixedTypeParameter[int64](parameters[4])
   567  		for i := uint64(0); i < uint64(length); i++ {
   568  			v1, null1 := p1.GetStrValue(i)
   569  			v2, null2 := p2.GetStrValue(i)
   570  			v3, null3 := p3.GetStrValue(i)
   571  			v4, null4 := p4.GetValue(i)
   572  			v5, null5 := p5.GetValue(i)
   573  			if null1 || null2 || null3 || null4 || null5 {
   574  				if err := rs.AppendBytes(nil, true); err != nil {
   575  					return err
   576  				}
   577  			} else {
   578  				val, err := op.regMap.regularReplace(functionUtil.QuickBytesToStr(v2), functionUtil.QuickBytesToStr(v1), functionUtil.QuickBytesToStr(v3), v4, v5)
   579  				if err != nil {
   580  					return err
   581  				}
   582  				if err = rs.AppendBytes([]byte(val), false); err != nil {
   583  					return err
   584  				}
   585  			}
   586  		}
   587  	}
   588  	return nil
   589  }
   590  
   591  type regexpSet struct {
   592  	mp map[string]*regexp.Regexp
   593  }
   594  
   595  func (rs *regexpSet) getRegularMatcher(pat string) (*regexp.Regexp, error) {
   596  	var err error
   597  
   598  	reg, ok := rs.mp[pat]
   599  	if !ok {
   600  		if len(rs.mp) == mapSizeForRegexp {
   601  			for key := range rs.mp {
   602  				delete(rs.mp, key)
   603  				break
   604  			}
   605  		}
   606  
   607  		reg, err = regexp.Compile(pat)
   608  		if err != nil {
   609  			return nil, err
   610  		}
   611  		rs.mp[pat] = reg
   612  	}
   613  	return reg, nil
   614  }
   615  
   616  func (rs *regexpSet) regularMatchForLikeOp(pat []byte, str []byte) (match bool, err error) {
   617  	replace := func(s string) string {
   618  		var oldCharactor rune
   619  
   620  		r := make([]byte, len(s)*2)
   621  		w := 0
   622  		start := 0
   623  		for len(s) > start {
   624  			character, wid := utf8.DecodeRuneInString(s[start:])
   625  			if oldCharactor == '\\' {
   626  				w += copy(r[w:], s[start:start+wid])
   627  				start += wid
   628  				oldCharactor = 0
   629  				continue
   630  			}
   631  			switch character {
   632  			case '_':
   633  				w += copy(r[w:], []byte{'.'})
   634  			case '%':
   635  				w += copy(r[w:], []byte{'.', '*'})
   636  			case '(':
   637  				w += copy(r[w:], []byte{'\\', '('})
   638  			case ')':
   639  				w += copy(r[w:], []byte{'\\', ')'})
   640  			case '\\':
   641  			default:
   642  				w += copy(r[w:], s[start:start+wid])
   643  			}
   644  			start += wid
   645  			oldCharactor = character
   646  		}
   647  		return string(r[:w])
   648  	}
   649  	convert := func(expr []byte) string {
   650  		return fmt.Sprintf("^(?s:%s)$", replace(util.UnsafeBytesToString(expr)))
   651  	}
   652  
   653  	realPat := convert(pat)
   654  	reg, err := rs.getRegularMatcher(realPat)
   655  	if err != nil {
   656  		return false, nil
   657  	}
   658  	return reg.Match(str), nil
   659  }
   660  
   661  // if str[pos:] matched pat.
   662  // return Nth (N = occurrence here) of match result
   663  func (rs *regexpSet) regularSubstr(pat string, str string, pos, occurrence int64) (match bool, substr string, err error) {
   664  	// check position
   665  	if pos < 1 || pos > int64(len(str)) {
   666  		return false, "", moerr.NewInvalidInputNoCtx("regexp_substr: Index out of bounds in regular expression search. Search start position: %d, Search string length: %d", pos, len(str))
   667  	}
   668  	// check occurrence
   669  	if occurrence < 1 {
   670  		return false, "", moerr.NewInvalidInputNoCtx("regexp_substr have Index out of bounds in regular expression search, return occurrence %d", occurrence)
   671  	}
   672  	reg, err := rs.getRegularMatcher(pat)
   673  	if err != nil {
   674  		return false, "", err
   675  	}
   676  
   677  	// match and return
   678  	matches := reg.FindAllString(str[pos-1:], -1)
   679  	if l := int64(len(matches)); l < occurrence {
   680  		return false, "", nil
   681  	}
   682  	return true, matches[occurrence-1], nil
   683  }
   684  
   685  func (rs *regexpSet) regularReplace(pat string, str string, repl string, pos, occurrence int64) (r string, err error) {
   686  	// check position
   687  	if pos < 1 || pos > int64(len(str)) {
   688  		return "", moerr.NewInvalidInputNoCtx("regexp_replace: Index out of bounds in regular expression search. Search start position: %d, Search string length: %d", pos, len(str))
   689  	}
   690  	// check occurrence
   691  	if occurrence < 0 {
   692  		return "", moerr.NewInvalidInputNoCtx("regexp_replace have Index out of bounds in regular expression search, return occurrence %d", occurrence)
   693  	}
   694  
   695  	reg, err := rs.getRegularMatcher(pat)
   696  	if err != nil {
   697  		pat = "[" + pat + "]"
   698  		return "", moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat)
   699  	}
   700  
   701  	//match result indexs
   702  	matchRes := reg.FindAllStringIndex(str, -1)
   703  	if matchRes == nil {
   704  		return str, nil
   705  	} //find the match position
   706  	index := 0
   707  	for int64(matchRes[index][0]) < pos-1 {
   708  		index++
   709  		if index == len(matchRes) {
   710  			return str, nil
   711  		}
   712  	}
   713  	matchRes = matchRes[index:]
   714  	if int64(len(matchRes)) < occurrence {
   715  		return str, nil
   716  	}
   717  	if occurrence == 0 {
   718  		return reg.ReplaceAllLiteralString(str, repl), nil
   719  	} else if occurrence == int64(len(matchRes)) {
   720  		// the string won't be replaced
   721  		notRepl := str[:matchRes[occurrence-1][0]]
   722  		// the string will be replaced
   723  		replace := str[matchRes[occurrence-1][0]:]
   724  		return notRepl + reg.ReplaceAllLiteralString(replace, repl), nil
   725  	} else {
   726  		// the string won't be replaced
   727  		notRepl := str[:matchRes[occurrence-1][0]]
   728  		// the string will be replaced
   729  		replace := str[matchRes[occurrence-1][0]:matchRes[occurrence][0]]
   730  		left := str[matchRes[occurrence][0]:]
   731  		return notRepl + reg.ReplaceAllLiteralString(replace, repl) + left, nil
   732  	}
   733  }
   734  
   735  // regularInstr return an index indicating the starting or ending position of the match.
   736  // it depends on the value of retOption, if 0 then return start, if 1 then return end.
   737  // return 0 if match failed.
   738  func (rs *regexpSet) regularInstr(pat string, str string, pos, occurrence int64, retOption int8) (index int64, err error) {
   739  	// check position
   740  	if pos < 1 || pos > int64(len(str)) {
   741  		return 0, moerr.NewInvalidInputNoCtx("regexp_instr: Index out of bounds in regular expression search. Search start position: %d, Search string length: %d", pos, len(str))
   742  	}
   743  	// check occurrence
   744  	if occurrence < 1 {
   745  		return 0, moerr.NewInvalidInputNoCtx("regexp_instr have Index out of bounds in regular expression search, return occurrence %d", occurrence)
   746  	}
   747  	// check retOption
   748  	if retOption > 1 {
   749  		return 0, moerr.NewInvalidInputNoCtx("regexp_instr have Index out of bounds in regular expression search, return option %d", retOption)
   750  	}
   751  
   752  	reg, err := rs.getRegularMatcher(pat)
   753  	if err != nil {
   754  		pat = "[" + pat + "]"
   755  		return 0, moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat)
   756  	}
   757  
   758  	matches := reg.FindAllStringIndex(str[pos-1:], -1)
   759  	if int64(len(matches)) < occurrence {
   760  		return 0, nil
   761  	}
   762  	return int64(matches[occurrence-1][retOption]) + pos, nil
   763  }
   764  
   765  func (rs *regexpSet) regularLike(pat string, str string, matchType string) (bool, error) {
   766  	mt, err := getPureMatchType(matchType)
   767  	if err != nil {
   768  		return false, err
   769  	}
   770  	rule := fmt.Sprintf("(?%s)%s", mt, pat)
   771  
   772  	reg, err := rs.getRegularMatcher(rule)
   773  	if err != nil {
   774  		return false, err
   775  	}
   776  
   777  	match := reg.MatchString(str)
   778  	return match, nil
   779  }
   780  
   781  // Support four arguments:
   782  // i: case insensitive.
   783  // c: case sensitive.
   784  // m: multiple line mode.
   785  // n: '.' can match line terminator.
   786  func getPureMatchType(input string) (string, error) {
   787  	retstring := ""
   788  	caseType := ""
   789  	foundn := false
   790  	foundm := false
   791  
   792  	for _, c := range input {
   793  		switch string(c) {
   794  		case "i":
   795  			caseType = "i"
   796  		case "c":
   797  			caseType = ""
   798  		case "m":
   799  			if !foundm {
   800  				retstring += "m"
   801  				foundm = true
   802  			}
   803  		case "n":
   804  			if !foundn {
   805  				retstring += "s"
   806  				foundn = true
   807  			}
   808  		default:
   809  			return "", moerr.NewInvalidInputNoCtx("regexp_like got invalid match_type input!")
   810  		}
   811  	}
   812  
   813  	retstring += caseType
   814  
   815  	return retstring, nil
   816  }