github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/regular/regular_replace.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package regular 16 17 import ( 18 "regexp" 19 "strings" 20 21 "github.com/matrixorigin/matrixone/pkg/common/moerr" 22 "github.com/matrixorigin/matrixone/pkg/container/nulls" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 "github.com/matrixorigin/matrixone/pkg/vm/process" 25 ) 26 27 func RegularReplace(expr, pat, repl string, pos, occurrence int64, match_type string) (string, error) { 28 if pos < 1 || occurrence < 0 || pos >= int64(len(expr)) { 29 return expr, moerr.NewInvalidInputNoCtx("regexp_replace have invalid input") 30 } 31 //regular expression pattern 32 reg, err := regexp.Compile(pat) 33 if err != nil { 34 return "", moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat) 35 } 36 //match result indexs 37 matchRes := reg.FindAllStringIndex(expr, -1) 38 if matchRes == nil { 39 return expr, nil 40 } //find the match position 41 index := 0 42 for int64(matchRes[index][0]) < pos-1 { 43 index++ 44 if index == len(matchRes) { 45 return expr, nil 46 } 47 } 48 matchRes = matchRes[index:] 49 if int64(len(matchRes)) < occurrence { 50 return expr, nil 51 } 52 if occurrence == 0 { 53 return reg.ReplaceAllLiteralString(expr, repl), nil 54 } else if occurrence == int64(len(matchRes)) { 55 // the string won't be replaced 56 notRepl := expr[:matchRes[occurrence-1][0]] 57 // the string will be replaced 58 replace := expr[matchRes[occurrence-1][0]:] 59 return notRepl + reg.ReplaceAllLiteralString(replace, repl), nil 60 } else { 61 // the string won't be replaced 62 notRepl := expr[:matchRes[occurrence-1][0]] 63 // the string will be replaced 64 replace := expr[matchRes[occurrence-1][0]:matchRes[occurrence][0]] 65 left := expr[matchRes[occurrence][0]:] 66 return notRepl + reg.ReplaceAllLiteralString(replace, repl) + left, nil 67 } 68 } 69 70 func RegularReplaceWithReg(expr string, pat *regexp.Regexp, repl string, pos, occurrence int64, match_type string) (string, error) { 71 if pos < 1 || occurrence < 0 || pos >= int64(len(expr)) { 72 return expr, moerr.NewInvalidInputNoCtx("regexp_replace have invalid input") 73 } 74 //match result indexs 75 matchRes := pat.FindAllStringIndex(expr, -1) 76 if matchRes == nil { 77 return expr, nil 78 } //find the match position 79 index := 0 80 for int64(matchRes[index][0]) < pos-1 { 81 index++ 82 if index == len(matchRes) { 83 return expr, nil 84 } 85 } 86 matchRes = matchRes[index:] 87 if int64(len(matchRes)) < occurrence { 88 return expr, nil 89 } 90 91 if occurrence == 0 { 92 return pat.ReplaceAllLiteralString(expr, repl), nil 93 } else if occurrence == int64(len(matchRes)) { 94 // the string won't be replaced 95 notRepl := expr[:matchRes[occurrence-1][0]] 96 // the string will be replaced 97 replace := expr[matchRes[occurrence-1][0]:] 98 return notRepl + pat.ReplaceAllLiteralString(replace, repl), nil 99 } else { 100 // the string won't be replaced 101 notRepl := expr[:matchRes[occurrence-1][0]] 102 // the string will be replaced 103 replace := expr[matchRes[occurrence-1][0]:matchRes[occurrence][0]] 104 left := expr[matchRes[occurrence][0]:] 105 return notRepl + pat.ReplaceAllLiteralString(replace, repl) + left, nil 106 } 107 } 108 109 func RegularReplaceWithArrays(expr, pat, rpls []string, pos, occ []int64, match_type []string, exprN, patN, rplN *nulls.Nulls, resultVector *vector.Vector, proc *process.Process, maxLen int) error { 110 rs := make([]string, maxLen) 111 var rpl string 112 var posValue int64 113 var occValue int64 114 if len(expr) == 1 && len(pat) == 1 { 115 reg, err := regexp.Compile(pat[0]) 116 if err != nil { 117 return moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat) 118 } 119 for i := 0; i < maxLen; i++ { 120 if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) { 121 nulls.Add(resultVector.Nsp, uint64(i)) 122 continue 123 } 124 rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i) 125 res, err := RegularReplaceWithReg(expr[0], reg, rpl, posValue, occValue, match_type[0]) 126 if err != nil { 127 return err 128 } 129 rs[i] = res 130 } 131 vector.AppendString(resultVector, rs, proc.Mp()) 132 } else if len(expr) == 1 { 133 for i := 0; i < maxLen; i++ { 134 if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) { 135 nulls.Add(resultVector.Nsp, uint64(i)) 136 continue 137 } 138 rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i) 139 res, err := RegularReplace(expr[0], pat[i], rpl, posValue, occValue, match_type[0]) 140 if err != nil { 141 return err 142 } 143 rs[i] = res 144 } 145 vector.AppendString(resultVector, rs, proc.Mp()) 146 } else if len(pat) == 1 { 147 reg, err := regexp.Compile(pat[0]) 148 if err != nil { 149 return moerr.NewInvalidArgNoCtx("regexp_replace have invalid regexp pattern arg", pat) 150 } 151 for i := 0; i < maxLen; i++ { 152 if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) { 153 nulls.Add(resultVector.Nsp, uint64(i)) 154 continue 155 } 156 rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i) 157 res, err := RegularReplaceWithReg(expr[i], reg, rpl, posValue, occValue, match_type[0]) 158 if err != nil { 159 return err 160 } 161 rs[i] = res 162 } 163 vector.AppendString(resultVector, rs, proc.Mp()) 164 } else { 165 for i := 0; i < maxLen; i++ { 166 if determineNulls(expr, pat, rpls, exprN, patN, rplN, i) { 167 nulls.Add(resultVector.Nsp, uint64(i)) 168 continue 169 } 170 rpl, posValue, occValue = determineValuesWithThree(rpls, pos, occ, i) 171 res, err := RegularReplace(expr[i], pat[i], rpl, posValue, occValue, match_type[0]) 172 if err != nil { 173 return err 174 } 175 rs[i] = res 176 } 177 vector.AppendString(resultVector, rs, proc.Mp()) 178 } 179 return nil 180 } 181 182 func ReplaceWithArrays(expr, subs, rpls []string, exprN, subsN, rplN *nulls.Nulls, resultVector *vector.Vector, proc *process.Process, maxLen int) error { 183 rs := make([]string, maxLen) 184 if len(expr) == 1 && len(subs) == 1 { 185 for i := 0; i < maxLen; i++ { 186 if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) { 187 nulls.Add(resultVector.Nsp, uint64(i)) 188 continue 189 } 190 appendRs(expr, subs, rpls, rs, 0, 0, i) 191 } 192 vector.AppendString(resultVector, rs, proc.Mp()) 193 } else if len(expr) == 1 { 194 for i := 0; i < maxLen; i++ { 195 if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) { 196 nulls.Add(resultVector.Nsp, uint64(i)) 197 continue 198 } 199 appendRs(expr, subs, rpls, rs, 0, i, i) 200 } 201 vector.AppendString(resultVector, rs, proc.Mp()) 202 } else if len(subs) == 1 { 203 for i := 0; i < maxLen; i++ { 204 if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) { 205 nulls.Add(resultVector.Nsp, uint64(i)) 206 continue 207 } 208 appendRs(expr, subs, rpls, rs, i, 0, i) 209 } 210 vector.AppendString(resultVector, rs, proc.Mp()) 211 } else { 212 for i := 0; i < maxLen; i++ { 213 if determineNulls(expr, subs, rpls, exprN, subsN, rplN, i) { 214 nulls.Add(resultVector.Nsp, uint64(i)) 215 continue 216 } 217 appendRs(expr, subs, rpls, rs, i, i, i) 218 } 219 vector.AppendString(resultVector, rs, proc.Mp()) 220 } 221 return nil 222 } 223 224 func appendRs(expr, subs, rpls, rs []string, ei, si, ri int) { 225 var rpl string 226 if len(rpls) == 1 { 227 rpl = rpls[0] 228 } else { 229 rpl = rpls[ri] 230 } 231 232 if subs[si] == "" { 233 rs[ri] = expr[ei] 234 } else { 235 rs[ri] = strings.ReplaceAll(expr[ei], subs[si], rpl) 236 } 237 } 238 239 func determineNulls(expr, pat, rpls []string, exprN, patN, rplN *nulls.Nulls, i int) bool { 240 var exprIndex int 241 var patIndex int 242 var rplIndex int 243 244 if len(expr) == 1 { 245 exprIndex = 0 246 } else { 247 exprIndex = i 248 } 249 250 if len(pat) == 1 { 251 patIndex = 0 252 } else { 253 patIndex = i 254 } 255 256 if len(rpls) == 1 { 257 rplIndex = 0 258 } else { 259 rplIndex = 1 260 } 261 return nulls.Contains(exprN, uint64(exprIndex)) || nulls.Contains(patN, uint64(patIndex)) || nulls.Contains(rplN, uint64(rplIndex)) 262 } 263 264 func determineValuesWithThree(rpls []string, pos, occ []int64, i int) (string, int64, int64) { 265 var rpl string 266 var posValue int64 267 var occValue int64 268 269 if len(rpls) == 1 { 270 rpl = rpls[0] 271 } else { 272 rpl = rpls[i] 273 } 274 275 if len(pos) == 1 { 276 posValue = pos[0] 277 } else { 278 posValue = pos[i] 279 } 280 281 if len(occ) == 1 { 282 occValue = occ[0] 283 } else { 284 occValue = occ[i] 285 } 286 287 return rpl, posValue, occValue 288 }