github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/regular/regular_instr.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package regular 16 17 import ( 18 "regexp" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/container/nulls" 22 ) 23 24 func RegularInstr(expr, pat string, pos, occurrence int64, return_option uint8, match_type string) (int64, error) { 25 if pos < 1 || occurrence < 1 || (return_option != 0 && return_option != 1) || pos >= int64(len(expr)) { 26 return 0, moerr.NewInvalidInputNoCtx("regexp_instr have invalid input") 27 } 28 //regular expression pattern 29 reg, err := regexp.Compile(pat) 30 if err != nil { 31 return 0, moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat) 32 } 33 //match result indexs 34 matchRes := reg.FindAllStringIndex(expr, -1) 35 if matchRes == nil { 36 return 0, nil 37 } 38 //find the match position 39 index := 0 40 for int64(matchRes[index][0]) < pos-1 { 41 index++ 42 if index == len(matchRes) { 43 return 0, nil 44 } 45 } 46 47 matchRes = matchRes[index:] 48 if int64(len(matchRes)) < occurrence { 49 return 0, nil 50 } 51 52 if return_option == 0 { 53 return int64(matchRes[occurrence-1][0] + 1), nil 54 } else { 55 return int64(matchRes[occurrence-1][1] + 1), nil 56 } 57 } 58 59 func RegularInstrWithReg(expr string, pat *regexp.Regexp, pos, occurrence int64, return_option uint8, match_type string) (int64, error) { 60 if pos < 1 || occurrence < 1 || (return_option != 0 && return_option != 1) || pos >= int64(len(expr)) { 61 return 0, moerr.NewInvalidInputNoCtx("regexp_instr have invalid input") 62 } 63 //match result indexs 64 matchRes := pat.FindAllStringIndex(expr, -1) 65 if matchRes == nil { 66 return 0, nil 67 } 68 //find the match position 69 index := 0 70 for int64(matchRes[index][0]) < pos-1 { 71 index++ 72 if index == len(matchRes) { 73 return 0, nil 74 } 75 } 76 matchRes = matchRes[index:] 77 if int64(len(matchRes)) < occurrence { 78 return 0, nil 79 } 80 81 if return_option == 0 { 82 return int64(matchRes[occurrence-1][0] + 1), nil 83 } else { 84 return int64(matchRes[occurrence-1][1] + 1), nil 85 } 86 } 87 88 func RegularInstrWithArrays(expr, pat []string, pos, occ []int64, return_option []uint8, match_type []string, exprN, patN, rns *nulls.Nulls, rs []int64, maxLen int) error { 89 var posValue int64 90 var occValue int64 91 var optValue uint8 92 if len(expr) == 1 && len(pat) == 1 { 93 reg, err := regexp.Compile(pat[0]) 94 if err != nil { 95 return moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat) 96 } 97 for i := 0; i < maxLen; i++ { 98 if nulls.Contains(exprN, uint64(0)) || nulls.Contains(patN, uint64(0)) { 99 nulls.Add(rns, uint64(i)) 100 continue 101 } 102 posValue, occValue, optValue = determineValues(pos, occ, return_option, i) 103 res, err := RegularInstrWithReg(expr[0], reg, posValue, occValue, optValue, match_type[0]) 104 if err != nil { 105 return err 106 } 107 rs[i] = res 108 } 109 } else if len(expr) == 1 { 110 for i := 0; i < maxLen; i++ { 111 if nulls.Contains(exprN, uint64(0)) || nulls.Contains(patN, uint64(i)) { 112 nulls.Add(rns, uint64(i)) 113 continue 114 } 115 posValue, occValue, optValue = determineValues(pos, occ, return_option, i) 116 res, err := RegularInstr(expr[0], pat[i], posValue, occValue, optValue, match_type[0]) 117 if err != nil { 118 return err 119 } 120 rs[i] = res 121 } 122 } else if len(pat) == 1 { 123 reg, err := regexp.Compile(pat[0]) 124 if err != nil { 125 return moerr.NewInvalidArgNoCtx("regexp_instr have invalid regexp pattern arg", pat) 126 } 127 for i := 0; i < maxLen; i++ { 128 if nulls.Contains(exprN, uint64(i)) || nulls.Contains(patN, uint64(0)) { 129 nulls.Add(rns, uint64(i)) 130 continue 131 } 132 posValue, occValue, optValue = determineValues(pos, occ, return_option, i) 133 res, err := RegularInstrWithReg(expr[i], reg, posValue, occValue, optValue, match_type[0]) 134 if err != nil { 135 return err 136 } 137 rs[i] = res 138 } 139 } else { 140 for i := 0; i < maxLen; i++ { 141 if nulls.Contains(exprN, uint64(i)) || nulls.Contains(patN, uint64(i)) { 142 nulls.Add(rns, uint64(i)) 143 continue 144 } 145 posValue, occValue, optValue = determineValues(pos, occ, return_option, i) 146 res, err := RegularInstr(expr[i], pat[i], posValue, occValue, optValue, match_type[0]) 147 if err != nil { 148 return err 149 } 150 rs[i] = res 151 } 152 } 153 return nil 154 } 155 156 func determineValues(pos, occ []int64, return_option []uint8, i int) (int64, int64, uint8) { 157 var posValue int64 158 var occValue int64 159 var optValue uint8 160 if len(pos) == 1 { 161 posValue = pos[0] 162 } else { 163 posValue = pos[i] 164 } 165 166 if len(occ) == 1 { 167 occValue = occ[0] 168 } else { 169 occValue = occ[i] 170 } 171 172 if len(return_option) == 1 { 173 optValue = return_option[0] 174 } else { 175 optValue = return_option[i] 176 } 177 178 return posValue, occValue, optValue 179 }