github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/function/builtin/multi/substr.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package multi 16 17 import ( 18 "context" 19 "math" 20 21 "github.com/matrixorigin/matrixone/pkg/common/moerr" 22 "github.com/matrixorigin/matrixone/pkg/container/nulls" 23 "github.com/matrixorigin/matrixone/pkg/container/types" 24 "github.com/matrixorigin/matrixone/pkg/container/vector" 25 "github.com/matrixorigin/matrixone/pkg/vectorize/substring" 26 "github.com/matrixorigin/matrixone/pkg/vm/process" 27 ) 28 29 // Cast, cast ... sigh. 30 func castConstAsInt64(ctx context.Context, vec *vector.Vector, idx int64) (int64, error) { 31 switch vec.GetType().Oid { 32 case types.T_uint8: 33 return int64(vector.GetValueAt[uint8](vec, idx)), nil 34 case types.T_uint16: 35 return int64(vector.GetValueAt[uint16](vec, idx)), nil 36 case types.T_uint32: 37 return int64(vector.GetValueAt[uint32](vec, idx)), nil 38 case types.T_uint64: 39 val := vector.GetValueAt[uint64](vec, idx) 40 if val > uint64(math.MaxInt64) { 41 return 0, moerr.NewInvalidArg(ctx, "function substring(str, start, lenth)", val) 42 } 43 return int64(val), nil 44 case types.T_int8: 45 return int64(vector.GetValueAt[int8](vec, idx)), nil 46 case types.T_int16: 47 return int64(vector.GetValueAt[int16](vec, idx)), nil 48 case types.T_int32: 49 return int64(vector.GetValueAt[int32](vec, idx)), nil 50 case types.T_int64: 51 return int64(vector.GetValueAt[int64](vec, idx)), nil 52 case types.T_float32: 53 return int64(vector.GetValueAt[float32](vec, idx)), nil 54 case types.T_float64: 55 val := vector.GetValueAt[float64](vec, idx) 56 if val > float64(math.MaxInt64) { 57 return 0, moerr.NewInvalidArg(ctx, "function substring(str, start, lenth)", val) 58 } 59 return int64(val), nil 60 default: 61 panic("castConstAsInt64 failed, unknown type") 62 } 63 } 64 65 func numSliceToI64[T types.BuiltinNumber](input []T) []int64 { 66 ret := make([]int64, len(input)) 67 for i, v := range input { 68 ret[i] = int64(v) 69 } 70 return ret 71 } 72 73 func castTVecAsInt64(vec *vector.Vector) []int64 { 74 switch vec.GetType().Oid { 75 case types.T_uint8: 76 return numSliceToI64(vector.GetFixedVectorValues[uint8](vec)) 77 case types.T_uint16: 78 return numSliceToI64(vector.GetFixedVectorValues[uint16](vec)) 79 case types.T_uint32: 80 return numSliceToI64(vector.GetFixedVectorValues[uint32](vec)) 81 case types.T_uint64: 82 return numSliceToI64(vector.GetFixedVectorValues[uint64](vec)) 83 case types.T_int8: 84 return numSliceToI64(vector.GetFixedVectorValues[int8](vec)) 85 case types.T_int16: 86 return numSliceToI64(vector.GetFixedVectorValues[int16](vec)) 87 case types.T_int32: 88 return numSliceToI64(vector.GetFixedVectorValues[int32](vec)) 89 case types.T_int64: 90 return numSliceToI64(vector.GetFixedVectorValues[int64](vec)) 91 case types.T_float32: 92 return numSliceToI64(vector.GetFixedVectorValues[float32](vec)) 93 case types.T_float64: 94 return numSliceToI64(vector.GetFixedVectorValues[float64](vec)) 95 default: 96 panic("castTVecAsInt64 failed, unknown type") 97 } 98 } 99 100 // XXX Unless I mis read the code, substring simply does the following 101 // columnSrcCol := vector.MustStrCols(srcVector) 102 // columnStartCol := castTVecAsInt64(startVector) 103 // columnLengthCol := castTVecAsInt64(lengthVector) 104 // cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()} 105 // substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs) 106 // return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp), nil 107 // What are we doing here? 108 109 func Substring(inputVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) { 110 // get the number of substr function parameters 111 var paramNum = len(inputVecs) 112 srcVector := inputVecs[0] 113 startVector := inputVecs[1] 114 // Substr function has no length parameter 115 if paramNum == 2 { 116 if srcVector.IsScalarNull() || startVector.IsScalarNull() { 117 return proc.AllocScalarNullVector(types.Type{Oid: types.T_char, Size: 24}), nil 118 } 119 } else { //Substring column with length parameter 120 lengthVector := inputVecs[2] 121 if srcVector.IsScalarNull() || startVector.IsScalarNull() || lengthVector.IsScalarNull() { 122 return proc.AllocScalarNullVector(types.Type{Oid: types.T_char, Size: 24}), nil 123 } 124 } 125 if srcVector.IsScalar() { 126 return substrSrcConst(inputVecs, proc) 127 } else { 128 return substrSrcCol(inputVecs, proc) 129 } 130 } 131 132 // substring first parameter is constant 133 func substrSrcConst(inputVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) { 134 var paramNum = len(inputVecs) 135 srcVector := inputVecs[0] 136 startVector := inputVecs[1] 137 138 if startVector.IsScalarNull() { 139 return proc.AllocConstNullVector(srcVector.Typ, srcVector.Length()), nil 140 } 141 142 // XXX if this vector is const, then it is not expanded. Really? 143 columnSrcCol := vector.MustStrCols(srcVector) 144 145 // request new memory space for result column 146 rows := calcResultVectorRows(inputVecs) 147 results := make([]string, rows) 148 resultNsp := nulls.NewWithSize(rows) 149 150 // set null row 151 if paramNum == 2 { 152 nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp) 153 } else { 154 nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp) 155 nulls.Or(inputVecs[2].Nsp, resultNsp, resultNsp) 156 } 157 158 if startVector.IsScalar() { 159 if paramNum == 2 { 160 // get start constant value 161 startValue, err := castConstAsInt64(proc.Ctx, startVector, 0) 162 if err != nil { 163 return nil, err 164 } 165 if startValue > 0 { 166 substring.SubstringFromLeftConstOffsetUnbounded(columnSrcCol, results, startValue-1) 167 } else if startValue < 0 { 168 substring.SubstringFromRightConstOffsetUnbounded(columnSrcCol, results, -startValue) 169 } else { 170 substring.SubstringFromZeroConstOffsetUnbounded(columnSrcCol, results) 171 } 172 return vector.NewConstString(srcVector.Typ, srcVector.Length(), results[0], proc.Mp()), nil 173 } else { //has third parameter 174 lengthVector := inputVecs[2] 175 if lengthVector.IsScalar() { 176 // get start constant value 177 startValue, err := castConstAsInt64(proc.Ctx, startVector, 0) 178 if err != nil { 179 return nil, err 180 } 181 // get length constant value 182 lengthValue, err := castConstAsInt64(proc.Ctx, lengthVector, 0) 183 if err != nil { 184 return nil, err 185 } 186 187 if startValue > 0 { 188 substring.SubstringFromLeftConstOffsetBounded(columnSrcCol, results, startValue-1, lengthValue) 189 } else if startValue < 0 { 190 substring.SubstringFromRightConstOffsetBounded(columnSrcCol, results, -startValue, lengthValue) 191 } else { 192 substring.SubstringFromZeroConstOffsetBounded(columnSrcCol, results) 193 } 194 return vector.NewConstString(srcVector.Typ, srcVector.Length(), results[0], proc.Mp()), nil 195 } else { 196 columnStartCol := castTVecAsInt64(startVector) 197 columnLengthCol := castTVecAsInt64(lengthVector) 198 cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()} 199 substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs) 200 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 201 } 202 } 203 } else { 204 if paramNum == 2 { 205 //The pos column is a variable or an expression 206 columnStartCol := castTVecAsInt64(inputVecs[1]) 207 cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar()} 208 substring.SubstringDynamicOffsetUnbounded(columnSrcCol, results, columnStartCol, cs) 209 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 210 } else { 211 //Substring column with length parameter 212 columnStartCol := castTVecAsInt64(inputVecs[1]) 213 columnLengthCol := castTVecAsInt64(inputVecs[2]) 214 cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()} 215 substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs) 216 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 217 } 218 } 219 } 220 221 // substring first paramter is column 222 func substrSrcCol(inputVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) { 223 var paramNum = len(inputVecs) 224 srcVector := inputVecs[0] 225 startVector := inputVecs[1] 226 columnSrcCol := vector.GetStrVectorValues(srcVector) 227 228 // request new memory space for result column 229 results := make([]string, len(columnSrcCol)) 230 231 //set null row 232 resultNsp := nulls.NewWithSize(len(results)) 233 if paramNum == 2 { 234 nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp) 235 } else { 236 nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp) 237 nulls.Or(inputVecs[2].Nsp, resultNsp, resultNsp) 238 } 239 240 if startVector.IsScalar() { 241 if paramNum == 2 { 242 // get start constant value 243 startValue, err := castConstAsInt64(proc.Ctx, startVector, 0) 244 if err != nil { 245 return nil, err 246 } 247 if startValue > 0 { 248 substring.SubstringFromLeftConstOffsetUnbounded(columnSrcCol, results, startValue-1) 249 } else if startValue < 0 { 250 substring.SubstringFromRightConstOffsetUnbounded(columnSrcCol, results, -startValue) 251 } else { 252 //startValue == 0 253 substring.SubstringFromZeroConstOffsetUnbounded(columnSrcCol, results) 254 } 255 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 256 } else { //has third parameter 257 lengthVector := inputVecs[2] 258 // if length parameter is constant 259 if lengthVector.IsScalar() { 260 // get start constant value 261 startValue, err := castConstAsInt64(proc.Ctx, startVector, 0) 262 if err != nil { 263 return nil, err 264 } 265 // get length constant value 266 lengthValue, err := castConstAsInt64(proc.Ctx, lengthVector, 0) 267 if err != nil { 268 return nil, err 269 } 270 if startValue > 0 { 271 substring.SubstringFromLeftConstOffsetBounded(columnSrcCol, results, startValue-1, lengthValue) 272 } else if startValue < 0 { 273 substring.SubstringFromRightConstOffsetBounded(columnSrcCol, results, -startValue, lengthValue) 274 } else { 275 //startValue == 0 276 substring.SubstringFromZeroConstOffsetBounded(columnSrcCol, results) 277 } 278 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 279 } else { 280 columnStartCol := castTVecAsInt64(inputVecs[1]) 281 columnLengthCol := castTVecAsInt64(inputVecs[2]) 282 cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()} 283 substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs) 284 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 285 } 286 } 287 } else { 288 if paramNum == 2 { 289 //The pos column is a variable or an expression 290 columnStartCol := castTVecAsInt64(inputVecs[1]) 291 cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar()} 292 substring.SubstringDynamicOffsetUnbounded(columnSrcCol, results, columnStartCol, cs) 293 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 294 } else { 295 columnStartCol := castTVecAsInt64(inputVecs[1]) 296 columnLengthCol := castTVecAsInt64(inputVecs[2]) 297 cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()} 298 substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs) 299 return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil 300 } 301 } 302 } 303 304 // calcResultVectorRows : Calculate size of returned result rows, which is used to calculate the memory space required 305 func calcResultVectorRows(inputVecs []*vector.Vector) int { 306 if len(inputVecs) == 2 { 307 if inputVecs[0].IsScalar() && inputVecs[1].IsScalar() { 308 return 1 309 } else { 310 return vector.Length(inputVecs[0]) 311 } 312 } else { 313 if inputVecs[0].IsScalar() && inputVecs[1].IsScalar() && inputVecs[2].IsScalar() { 314 return 1 315 } else { 316 return vector.Length(inputVecs[0]) 317 } 318 } 319 }