github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/regular/regular_substr.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package regular 16 17 import ( 18 "regexp" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/container/nulls" 22 "github.com/matrixorigin/matrixone/pkg/container/vector" 23 "github.com/matrixorigin/matrixone/pkg/vm/process" 24 ) 25 26 func RegularSubstr(expr, pat string, pos, occurrence int64, match_type string) ([]string, error) { 27 if pos < 1 || occurrence < 1 || pos >= int64(len(expr)) { 28 return nil, moerr.NewInvalidInputNoCtx("regexp_substr have invalid input") 29 } 30 //regular expression pattern 31 reg, err := regexp.Compile(pat) 32 if err != nil { 33 return nil, moerr.NewInvalidArgNoCtx("regexp_substr have invalid regexp pattern arg", pat) 34 } 35 //match result strings 36 matchRes := reg.FindAllString(expr[pos-1:], -1) 37 if matchRes == nil || int64(len(matchRes)) < occurrence { 38 return nil, nil 39 } 40 return matchRes, nil 41 } 42 43 func RegularSubstrWithReg(expr string, pat *regexp.Regexp, pos, occurrence int64, match_type string) ([]string, error) { 44 if pos < 1 || occurrence < 1 || pos >= int64(len(expr)) { 45 return nil, moerr.NewInvalidInputNoCtx("regexp_substr have invalid input") 46 } 47 //match result strings 48 matchRes := pat.FindAllString(expr[pos-1:], -1) 49 if matchRes == nil || int64(len(matchRes)) < occurrence { 50 return nil, nil 51 } 52 return matchRes, nil 53 } 54 55 func RegularSubstrWithArrays(expr, pat []string, pos, occ []int64, match_type []string, exprN, patN *nulls.Nulls, resultVector *vector.Vector, proc *process.Process, maxLen int) error { 56 rs := make([]string, maxLen) 57 var posValue int64 58 var occValue int64 59 if len(expr) == 1 && len(pat) == 1 { 60 reg, err := regexp.Compile(pat[0]) 61 if err != nil { 62 return moerr.NewInvalidArgNoCtx("regexp_substr have invalid regexp pattern arg", pat) 63 } 64 for i := 0; i < maxLen; i++ { 65 if nulls.Contains(exprN, uint64(0)) || nulls.Contains(patN, uint64(0)) || pat[0] == "" { 66 nulls.Add(resultVector.Nsp, uint64(i)) 67 continue 68 } 69 posValue, occValue = determineValuesWithTwo(pos, occ, i) 70 res, err := RegularSubstrWithReg(expr[0], reg, posValue, occValue, match_type[0]) 71 if err != nil { 72 return err 73 } 74 if res == nil { 75 nulls.Add(resultVector.Nsp, uint64(i)) 76 continue 77 } 78 rs[i] = res[occValue-1] 79 } 80 vector.AppendString(resultVector, rs, proc.Mp()) 81 } else if len(expr) == 1 { 82 for i := 0; i < maxLen; i++ { 83 if nulls.Contains(exprN, uint64(0)) || nulls.Contains(patN, uint64(i)) || pat[i] == "" { 84 nulls.Add(resultVector.Nsp, uint64(i)) 85 continue 86 } 87 posValue, occValue = determineValuesWithTwo(pos, occ, i) 88 res, err := RegularSubstr(expr[0], pat[i], posValue, occValue, match_type[0]) 89 if err != nil { 90 return err 91 } 92 if res == nil { 93 nulls.Add(resultVector.Nsp, uint64(i)) 94 continue 95 } 96 rs[i] = res[occValue-1] 97 } 98 vector.AppendString(resultVector, rs, proc.Mp()) 99 } else if len(pat) == 1 { 100 reg, err := regexp.Compile(pat[0]) 101 if err != nil { 102 return moerr.NewInvalidArgNoCtx("regexp_substr have invalid regexp pattern arg", pat) 103 } 104 for i := 0; i < maxLen; i++ { 105 if nulls.Contains(exprN, uint64(i)) || nulls.Contains(patN, uint64(0)) || pat[0] == "" { 106 nulls.Add(resultVector.Nsp, uint64(i)) 107 continue 108 } 109 posValue, occValue = determineValuesWithTwo(pos, occ, i) 110 res, err := RegularSubstrWithReg(expr[i], reg, posValue, occValue, match_type[0]) 111 if err != nil { 112 return err 113 } 114 if res == nil { 115 nulls.Add(resultVector.Nsp, uint64(i)) 116 continue 117 } 118 rs[i] = res[occValue-1] 119 } 120 vector.AppendString(resultVector, rs, proc.Mp()) 121 } else { 122 for i := 0; i < maxLen; i++ { 123 if nulls.Contains(exprN, uint64(i)) || nulls.Contains(patN, uint64(i)) || pat[i] == "" { 124 nulls.Add(resultVector.Nsp, uint64(i)) 125 continue 126 } 127 posValue, occValue = determineValuesWithTwo(pos, occ, i) 128 res, err := RegularSubstr(expr[0], pat[i], posValue, occValue, match_type[0]) 129 if err != nil { 130 return err 131 } 132 if res == nil { 133 nulls.Add(resultVector.Nsp, uint64(i)) 134 continue 135 } 136 rs[i] = res[occValue-1] 137 } 138 vector.AppendString(resultVector, rs, proc.Mp()) 139 } 140 return nil 141 } 142 143 func determineValuesWithTwo(pos, occ []int64, i int) (int64, int64) { 144 var posValue int64 145 var occValue int64 146 147 if len(pos) == 1 { 148 posValue = pos[0] 149 } else { 150 posValue = pos[i] 151 } 152 153 if len(occ) == 1 { 154 occValue = occ[0] 155 } else { 156 occValue = occ[i] 157 } 158 159 return posValue, occValue 160 }