github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/regular/regular_replace.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package regular
    16  
    17  import (
    18  	"regexp"
    19  	"strings"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    22  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    25  )
    26  
    27  func RegularReplace(expr, pat, repl string, pos, occurrence int64, match_type string) (string, error) {
    28  	if pos < 1 || occurrence < 0 || pos >= int64(len(expr)) {
    29  		return expr, moerr.NewInvalidInputNoCtx("regexp_replace have invalid input")
    30  	}
    31  	//regular expression pattern
    32  	reg, err := regexp.Compile(pat)
    33  	if err != nil {
    34  		return "", moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat)
    35  	}
    36  	//match result indexs
    37  	matchRes := reg.FindAllStringIndex(expr, -1)
    38  	if matchRes == nil {
    39  		return expr, nil
    40  	} //find the match position
    41  	index := 0
    42  	for int64(matchRes[index][0]) < pos-1 {
    43  		index++
    44  		if index == len(matchRes) {
    45  			return expr, nil
    46  		}
    47  	}
    48  	matchRes = matchRes[index:]
    49  	if int64(len(matchRes)) < occurrence {
    50  		return expr, nil
    51  	}
    52  	if occurrence == 0 {
    53  		return reg.ReplaceAllLiteralString(expr, repl), nil
    54  	} else if occurrence == int64(len(matchRes)) {
    55  		// the string won't be replaced
    56  		notRepl := expr[:matchRes[occurrence-1][0]]
    57  		// the string will be replaced
    58  		replace := expr[matchRes[occurrence-1][0]:]
    59  		return notRepl + reg.ReplaceAllLiteralString(replace, repl), nil
    60  	} else {
    61  		// the string won't be replaced
    62  		notRepl := expr[:matchRes[occurrence-1][0]]
    63  		// the string will be replaced
    64  		replace := expr[matchRes[occurrence-1][0]:matchRes[occurrence][0]]
    65  		left := expr[matchRes[occurrence][0]:]
    66  		return notRepl + reg.ReplaceAllLiteralString(replace, repl) + left, nil
    67  	}
    68  }
    69  
    70  func RegularReplaceWithReg(expr string, pat *regexp.Regexp, repl string, pos, occurrence int64, match_type string) (string, error) {
    71  	if pos < 1 || occurrence < 0 || pos >= int64(len(expr)) {
    72  		return expr, moerr.NewInvalidInputNoCtx("regexp_replace have invalid input")
    73  	}
    74  	//match result indexs
    75  	matchRes := pat.FindAllStringIndex(expr, -1)
    76  	if matchRes == nil {
    77  		return expr, nil
    78  	} //find the match position
    79  	index := 0
    80  	for int64(matchRes[index][0]) < pos-1 {
    81  		index++
    82  		if index == len(matchRes) {
    83  			return expr, nil
    84  		}
    85  	}
    86  	matchRes = matchRes[index:]
    87  	if int64(len(matchRes)) < occurrence {
    88  		return expr, nil
    89  	}
    90  
    91  	if occurrence == 0 {
    92  		return pat.ReplaceAllLiteralString(expr, repl), nil
    93  	} else if occurrence == int64(len(matchRes)) {
    94  		// the string won't be replaced
    95  		notRepl := expr[:matchRes[occurrence-1][0]]
    96  		// the string will be replaced
    97  		replace := expr[matchRes[occurrence-1][0]:]
    98  		return notRepl + pat.ReplaceAllLiteralString(replace, repl), nil
    99  	} else {
   100  		// the string won't be replaced
   101  		notRepl := expr[:matchRes[occurrence-1][0]]
   102  		// the string will be replaced
   103  		replace := expr[matchRes[occurrence-1][0]:matchRes[occurrence][0]]
   104  		left := expr[matchRes[occurrence][0]:]
   105  		return notRepl + pat.ReplaceAllLiteralString(replace, repl) + left, nil
   106  	}
   107  }
   108  
   109  func RegularReplaceWithArrays(expr, pat, rpls []string, pos, occ []int64, match_type []string, exprN, patN, rplN *nulls.Nulls, resultVector *vector.Vector, proc *process.Process, maxLen int) error {
   110  	rs := make([]string, maxLen)
   111  	var rpl string
   112  	var posValue int64
   113  	var occValue int64
   114  	if len(expr) == 1 && len(pat) == 1 {
   115  		reg, err := regexp.Compile(pat[0])
   116  		if err != nil {
   117  			return moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat)
   118  		}
   119  		for i := 0; i < maxLen; i++ {
   120  			if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) {
   121  				nulls.Add(resultVector.Nsp, uint64(i))
   122  				continue
   123  			}
   124  			rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i)
   125  			res, err := RegularReplaceWithReg(expr[0], reg, rpl, posValue, occValue, match_type[0])
   126  			if err != nil {
   127  				return err
   128  			}
   129  			rs[i] = res
   130  		}
   131  		vector.AppendString(resultVector, rs, proc.Mp())
   132  	} else if len(expr) == 1 {
   133  		for i := 0; i < maxLen; i++ {
   134  			if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) {
   135  				nulls.Add(resultVector.Nsp, uint64(i))
   136  				continue
   137  			}
   138  			rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i)
   139  			res, err := RegularReplace(expr[0], pat[i], rpl, posValue, occValue, match_type[0])
   140  			if err != nil {
   141  				return err
   142  			}
   143  			rs[i] = res
   144  		}
   145  		vector.AppendString(resultVector, rs, proc.Mp())
   146  	} else if len(pat) == 1 {
   147  		reg, err := regexp.Compile(pat[0])
   148  		if err != nil {
   149  			return moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat)
   150  		}
   151  		for i := 0; i < maxLen; i++ {
   152  			if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) {
   153  				nulls.Add(resultVector.Nsp, uint64(i))
   154  				continue
   155  			}
   156  			rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i)
   157  			res, err := RegularReplaceWithReg(expr[i], reg, rpl, posValue, occValue, match_type[0])
   158  			if err != nil {
   159  				return err
   160  			}
   161  			rs[i] = res
   162  		}
   163  		vector.AppendString(resultVector, rs, proc.Mp())
   164  	} else {
   165  		for i := 0; i < maxLen; i++ {
   166  			if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) {
   167  				nulls.Add(resultVector.Nsp, uint64(i))
   168  				continue
   169  			}
   170  			rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i)
   171  			res, err := RegularReplace(expr[i], pat[i], rpl, posValue, occValue, match_type[0])
   172  			if err != nil {
   173  				return err
   174  			}
   175  			rs[i] = res
   176  		}
   177  		vector.AppendString(resultVector, rs, proc.Mp())
   178  	}
   179  	return nil
   180  }
   181  
   182  func ReplaceWithArrays(expr, subs, rpls []string, exprN, subsN, rplN *nulls.Nulls, resultVector *vector.Vector, proc *process.Process, maxLen int) error {
   183  	rs := make([]string, maxLen)
   184  	if len(expr) == 1 && len(subs) == 1 {
   185  		for i := 0; i < maxLen; i++ {
   186  			if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) {
   187  				nulls.Add(resultVector.Nsp, uint64(i))
   188  				continue
   189  			}
   190  			appendRs(expr, subs, rpls, rs, 0, 0, i)
   191  		}
   192  		vector.AppendString(resultVector, rs, proc.Mp())
   193  	} else if len(expr) == 1 {
   194  		for i := 0; i < maxLen; i++ {
   195  			if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) {
   196  				nulls.Add(resultVector.Nsp, uint64(i))
   197  				continue
   198  			}
   199  			appendRs(expr, subs, rpls, rs, 0, i, i)
   200  		}
   201  		vector.AppendString(resultVector, rs, proc.Mp())
   202  	} else if len(subs) == 1 {
   203  		for i := 0; i < maxLen; i++ {
   204  			if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) {
   205  				nulls.Add(resultVector.Nsp, uint64(i))
   206  				continue
   207  			}
   208  			appendRs(expr, subs, rpls, rs, i, 0, i)
   209  		}
   210  		vector.AppendString(resultVector, rs, proc.Mp())
   211  	} else {
   212  		for i := 0; i < maxLen; i++ {
   213  			if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) {
   214  				nulls.Add(resultVector.Nsp, uint64(i))
   215  				continue
   216  			}
   217  			appendRs(expr, subs, rpls, rs, i, i, i)
   218  		}
   219  		vector.AppendString(resultVector, rs, proc.Mp())
   220  	}
   221  	return nil
   222  }
   223  
   224  func appendRs(expr, subs, rpls, rs []string, ei, si, ri int) {
   225  	var rpl string
   226  	if len(rpls) == 1 {
   227  		rpl = rpls[0]
   228  	} else {
   229  		rpl = rpls[ri]
   230  	}
   231  
   232  	if subs[si] == "" {
   233  		rs[ri] = expr[ei]
   234  	} else {
   235  		rs[ri] = strings.ReplaceAll(expr[ei], subs[si], rpl)
   236  	}
   237  }
   238  
   239  func determineNulls(expr, pat, rpls []string, exprN, patN, rplN *nulls.Nulls, i int) bool {
   240  	var exprIndex int
   241  	var patIndex int
   242  	var rplIndex int
   243  
   244  	if len(expr) == 1 {
   245  		exprIndex = 0
   246  	} else {
   247  		exprIndex = i
   248  	}
   249  
   250  	if len(pat) == 1 {
   251  		patIndex = 0
   252  	} else {
   253  		patIndex = i
   254  	}
   255  
   256  	if len(rpls) == 1 {
   257  		rplIndex = 0
   258  	} else {
   259  		rplIndex = 1
   260  	}
   261  	return nulls.Contains(exprN, uint64(exprIndex)) || nulls.Contains(patN, uint64(patIndex)) || nulls.Contains(rplN, uint64(rplIndex))
   262  }
   263  
   264  func determineValuesWithThree(rpls []string, pos, occ []int64, i int) (string, int64, int64) {
   265  	var rpl string
   266  	var posValue int64
   267  	var occValue int64
   268  
   269  	if len(rpls) == 1 {
   270  		rpl = rpls[0]
   271  	} else {
   272  		rpl = rpls[i]
   273  	}
   274  
   275  	if len(pos) == 1 {
   276  		posValue = pos[0]
   277  	} else {
   278  		posValue = pos[i]
   279  	}
   280  
   281  	if len(occ) == 1 {
   282  		occValue = occ[0]
   283  	} else {
   284  		occValue = occ[i]
   285  	}
   286  
   287  	return rpl, posValue, occValue
   288  }