github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/regular/regular_instr.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package regular
    16  
    17  import (
    18  	"regexp"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    22  )
    23  
    24  func RegularInstr(expr, pat string, pos, occurrence int64, return_option uint8, match_type string) (int64, error) {
    25  	if pos < 1 || occurrence < 1 || (return_option != 0 && return_option != 1) || pos >= int64(len(expr)) {
    26  		return 0, moerr.NewInvalidInputNoCtx("regexp_instr have invalid input")
    27  	}
    28  	//regular expression pattern
    29  	reg, err := regexp.Compile(pat)
    30  	if err != nil {
    31  		return 0, moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat)
    32  	}
    33  	//match result indexs
    34  	matchRes := reg.FindAllStringIndex(expr, -1)
    35  	if matchRes == nil {
    36  		return 0, nil
    37  	}
    38  	//find the match position
    39  	index := 0
    40  	for int64(matchRes[index][0]) < pos-1 {
    41  		index++
    42  		if index == len(matchRes) {
    43  			return 0, nil
    44  		}
    45  	}
    46  
    47  	matchRes = matchRes[index:]
    48  	if int64(len(matchRes)) < occurrence {
    49  		return 0, nil
    50  	}
    51  
    52  	if return_option == 0 {
    53  		return int64(matchRes[occurrence-1][0] + 1), nil
    54  	} else {
    55  		return int64(matchRes[occurrence-1][1] + 1), nil
    56  	}
    57  }
    58  
    59  func RegularInstrWithReg(expr string, pat *regexp.Regexp, pos, occurrence int64, return_option uint8, match_type string) (int64, error) {
    60  	if pos < 1 || occurrence < 1 || (return_option != 0 && return_option != 1) || pos >= int64(len(expr)) {
    61  		return 0, moerr.NewInvalidInputNoCtx("regexp_instr have invalid input")
    62  	}
    63  	//match result indexs
    64  	matchRes := pat.FindAllStringIndex(expr, -1)
    65  	if matchRes == nil {
    66  		return 0, nil
    67  	}
    68  	//find the match position
    69  	index := 0
    70  	for int64(matchRes[index][0]) < pos-1 {
    71  		index++
    72  		if index == len(matchRes) {
    73  			return 0, nil
    74  		}
    75  	}
    76  	matchRes = matchRes[index:]
    77  	if int64(len(matchRes)) < occurrence {
    78  		return 0, nil
    79  	}
    80  
    81  	if return_option == 0 {
    82  		return int64(matchRes[occurrence-1][0] + 1), nil
    83  	} else {
    84  		return int64(matchRes[occurrence-1][1] + 1), nil
    85  	}
    86  }
    87  
    88  func RegularInstrWithArrays(expr, pat []string, pos, occ []int64, return_option []uint8, match_type []string, exprN, patN, rns *nulls.Nulls, rs []int64, maxLen int) error {
    89  	var posValue int64
    90  	var occValue int64
    91  	var optValue uint8
    92  	if len(expr) == 1 && len(pat) == 1 {
    93  		reg, err := regexp.Compile(pat[0])
    94  		if err != nil {
    95  			return moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat)
    96  		}
    97  		for i := 0; i < maxLen; i++ {
    98  			if nulls.Contains(exprN, uint64(0)) || nulls.Contains(patN, uint64(0)) {
    99  				nulls.Add(rns, uint64(i))
   100  				continue
   101  			}
   102  			posValue, occValue, optValue = determineValues(pos, occ, return_option, i)
   103  			res, err := RegularInstrWithReg(expr[0], reg, posValue, occValue, optValue, match_type[0])
   104  			if err != nil {
   105  				return err
   106  			}
   107  			rs[i] = res
   108  		}
   109  	} else if len(expr) == 1 {
   110  		for i := 0; i < maxLen; i++ {
   111  			if nulls.Contains(exprN, uint64(0)) || nulls.Contains(patN, uint64(i)) {
   112  				nulls.Add(rns, uint64(i))
   113  				continue
   114  			}
   115  			posValue, occValue, optValue = determineValues(pos, occ, return_option, i)
   116  			res, err := RegularInstr(expr[0], pat[i], posValue, occValue, optValue, match_type[0])
   117  			if err != nil {
   118  				return err
   119  			}
   120  			rs[i] = res
   121  		}
   122  	} else if len(pat) == 1 {
   123  		reg, err := regexp.Compile(pat[0])
   124  		if err != nil {
   125  			return moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat)
   126  		}
   127  		for i := 0; i < maxLen; i++ {
   128  			if nulls.Contains(exprN, uint64(i)) || nulls.Contains(patN, uint64(0)) {
   129  				nulls.Add(rns, uint64(i))
   130  				continue
   131  			}
   132  			posValue, occValue, optValue = determineValues(pos, occ, return_option, i)
   133  			res, err := RegularInstrWithReg(expr[i], reg, posValue, occValue, optValue, match_type[0])
   134  			if err != nil {
   135  				return err
   136  			}
   137  			rs[i] = res
   138  		}
   139  	} else {
   140  		for i := 0; i < maxLen; i++ {
   141  			if nulls.Contains(exprN, uint64(i)) || nulls.Contains(patN, uint64(i)) {
   142  				nulls.Add(rns, uint64(i))
   143  				continue
   144  			}
   145  			posValue, occValue, optValue = determineValues(pos, occ, return_option, i)
   146  			res, err := RegularInstr(expr[i], pat[i], posValue, occValue, optValue, match_type[0])
   147  			if err != nil {
   148  				return err
   149  			}
   150  			rs[i] = res
   151  		}
   152  	}
   153  	return nil
   154  }
   155  
   156  func determineValues(pos, occ []int64, return_option []uint8, i int) (int64, int64, uint8) {
   157  	var posValue int64
   158  	var occValue int64
   159  	var optValue uint8
   160  	if len(pos) == 1 {
   161  		posValue = pos[0]
   162  	} else {
   163  		posValue = pos[i]
   164  	}
   165  
   166  	if len(occ) == 1 {
   167  		occValue = occ[0]
   168  	} else {
   169  		occValue = occ[i]
   170  	}
   171  
   172  	if len(return_option) == 1 {
   173  		optValue = return_option[0]
   174  	} else {
   175  		optValue = return_option[i]
   176  	}
   177  
   178  	return posValue, occValue, optValue
   179  }