github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/function/builtin/multi/substr.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package multi
    16  
    17  import (
    18  	"context"
    19  	"math"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    22  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    23  	"github.com/matrixorigin/matrixone/pkg/container/types"
    24  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    25  	"github.com/matrixorigin/matrixone/pkg/vectorize/substring"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    27  )
    28  
    29  // Cast, cast ...  sigh.
    30  func castConstAsInt64(ctx context.Context, vec *vector.Vector, idx int64) (int64, error) {
    31  	switch vec.GetType().Oid {
    32  	case types.T_uint8:
    33  		return int64(vector.GetValueAt[uint8](vec, idx)), nil
    34  	case types.T_uint16:
    35  		return int64(vector.GetValueAt[uint16](vec, idx)), nil
    36  	case types.T_uint32:
    37  		return int64(vector.GetValueAt[uint32](vec, idx)), nil
    38  	case types.T_uint64:
    39  		val := vector.GetValueAt[uint64](vec, idx)
    40  		if val > uint64(math.MaxInt64) {
    41  			return 0, moerr.NewInvalidArg(ctx, "function substring(str, start, lenth)", val)
    42  		}
    43  		return int64(val), nil
    44  	case types.T_int8:
    45  		return int64(vector.GetValueAt[int8](vec, idx)), nil
    46  	case types.T_int16:
    47  		return int64(vector.GetValueAt[int16](vec, idx)), nil
    48  	case types.T_int32:
    49  		return int64(vector.GetValueAt[int32](vec, idx)), nil
    50  	case types.T_int64:
    51  		return int64(vector.GetValueAt[int64](vec, idx)), nil
    52  	case types.T_float32:
    53  		return int64(vector.GetValueAt[float32](vec, idx)), nil
    54  	case types.T_float64:
    55  		val := vector.GetValueAt[float64](vec, idx)
    56  		if val > float64(math.MaxInt64) {
    57  			return 0, moerr.NewInvalidArg(ctx, "function substring(str, start, lenth)", val)
    58  		}
    59  		return int64(val), nil
    60  	default:
    61  		panic("castConstAsInt64 failed, unknown type")
    62  	}
    63  }
    64  
    65  func numSliceToI64[T types.BuiltinNumber](input []T) []int64 {
    66  	ret := make([]int64, len(input))
    67  	for i, v := range input {
    68  		ret[i] = int64(v)
    69  	}
    70  	return ret
    71  }
    72  
    73  func castTVecAsInt64(vec *vector.Vector) []int64 {
    74  	switch vec.GetType().Oid {
    75  	case types.T_uint8:
    76  		return numSliceToI64(vector.GetFixedVectorValues[uint8](vec))
    77  	case types.T_uint16:
    78  		return numSliceToI64(vector.GetFixedVectorValues[uint16](vec))
    79  	case types.T_uint32:
    80  		return numSliceToI64(vector.GetFixedVectorValues[uint32](vec))
    81  	case types.T_uint64:
    82  		return numSliceToI64(vector.GetFixedVectorValues[uint64](vec))
    83  	case types.T_int8:
    84  		return numSliceToI64(vector.GetFixedVectorValues[int8](vec))
    85  	case types.T_int16:
    86  		return numSliceToI64(vector.GetFixedVectorValues[int16](vec))
    87  	case types.T_int32:
    88  		return numSliceToI64(vector.GetFixedVectorValues[int32](vec))
    89  	case types.T_int64:
    90  		return numSliceToI64(vector.GetFixedVectorValues[int64](vec))
    91  	case types.T_float32:
    92  		return numSliceToI64(vector.GetFixedVectorValues[float32](vec))
    93  	case types.T_float64:
    94  		return numSliceToI64(vector.GetFixedVectorValues[float64](vec))
    95  	default:
    96  		panic("castTVecAsInt64 failed, unknown type")
    97  	}
    98  }
    99  
   100  // XXX Unless I mis read the code, substring simply does the following
   101  //				columnSrcCol := vector.MustStrCols(srcVector)
   102  //				columnStartCol := castTVecAsInt64(startVector)
   103  //				columnLengthCol := castTVecAsInt64(lengthVector)
   104  //				cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()}
   105  //				substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs)
   106  //				return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp), nil
   107  // What are we doing here?
   108  
   109  func Substring(inputVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) {
   110  	// get the number of substr function parameters
   111  	var paramNum = len(inputVecs)
   112  	srcVector := inputVecs[0]
   113  	startVector := inputVecs[1]
   114  	// Substr function has no length parameter
   115  	if paramNum == 2 {
   116  		if srcVector.IsScalarNull() || startVector.IsScalarNull() {
   117  			return proc.AllocScalarNullVector(types.Type{Oid: types.T_char, Size: 24}), nil
   118  		}
   119  	} else { //Substring column with length parameter
   120  		lengthVector := inputVecs[2]
   121  		if srcVector.IsScalarNull() || startVector.IsScalarNull() || lengthVector.IsScalarNull() {
   122  			return proc.AllocScalarNullVector(types.Type{Oid: types.T_char, Size: 24}), nil
   123  		}
   124  	}
   125  	if srcVector.IsScalar() {
   126  		return substrSrcConst(inputVecs, proc)
   127  	} else {
   128  		return substrSrcCol(inputVecs, proc)
   129  	}
   130  }
   131  
   132  // substring first parameter is constant
   133  func substrSrcConst(inputVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) {
   134  	var paramNum = len(inputVecs)
   135  	srcVector := inputVecs[0]
   136  	startVector := inputVecs[1]
   137  
   138  	if startVector.IsScalarNull() {
   139  		return proc.AllocConstNullVector(srcVector.Typ, srcVector.Length()), nil
   140  	}
   141  
   142  	// XXX if this vector is const, then it is not expanded.  Really?
   143  	columnSrcCol := vector.MustStrCols(srcVector)
   144  
   145  	// request new memory space for result column
   146  	rows := calcResultVectorRows(inputVecs)
   147  	results := make([]string, rows)
   148  	resultNsp := nulls.NewWithSize(rows)
   149  
   150  	// set null row
   151  	if paramNum == 2 {
   152  		nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp)
   153  	} else {
   154  		nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp)
   155  		nulls.Or(inputVecs[2].Nsp, resultNsp, resultNsp)
   156  	}
   157  
   158  	if startVector.IsScalar() {
   159  		if paramNum == 2 {
   160  			// get start constant value
   161  			startValue, err := castConstAsInt64(proc.Ctx, startVector, 0)
   162  			if err != nil {
   163  				return nil, err
   164  			}
   165  			if startValue > 0 {
   166  				substring.SubstringFromLeftConstOffsetUnbounded(columnSrcCol, results, startValue-1)
   167  			} else if startValue < 0 {
   168  				substring.SubstringFromRightConstOffsetUnbounded(columnSrcCol, results, -startValue)
   169  			} else {
   170  				substring.SubstringFromZeroConstOffsetUnbounded(columnSrcCol, results)
   171  			}
   172  			return vector.NewConstString(srcVector.Typ, srcVector.Length(), results[0], proc.Mp()), nil
   173  		} else { //has third parameter
   174  			lengthVector := inputVecs[2]
   175  			if lengthVector.IsScalar() {
   176  				// get start constant value
   177  				startValue, err := castConstAsInt64(proc.Ctx, startVector, 0)
   178  				if err != nil {
   179  					return nil, err
   180  				}
   181  				// get length constant value
   182  				lengthValue, err := castConstAsInt64(proc.Ctx, lengthVector, 0)
   183  				if err != nil {
   184  					return nil, err
   185  				}
   186  
   187  				if startValue > 0 {
   188  					substring.SubstringFromLeftConstOffsetBounded(columnSrcCol, results, startValue-1, lengthValue)
   189  				} else if startValue < 0 {
   190  					substring.SubstringFromRightConstOffsetBounded(columnSrcCol, results, -startValue, lengthValue)
   191  				} else {
   192  					substring.SubstringFromZeroConstOffsetBounded(columnSrcCol, results)
   193  				}
   194  				return vector.NewConstString(srcVector.Typ, srcVector.Length(), results[0], proc.Mp()), nil
   195  			} else {
   196  				columnStartCol := castTVecAsInt64(startVector)
   197  				columnLengthCol := castTVecAsInt64(lengthVector)
   198  				cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()}
   199  				substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs)
   200  				return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   201  			}
   202  		}
   203  	} else {
   204  		if paramNum == 2 {
   205  			//The pos column is a variable or an expression
   206  			columnStartCol := castTVecAsInt64(inputVecs[1])
   207  			cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar()}
   208  			substring.SubstringDynamicOffsetUnbounded(columnSrcCol, results, columnStartCol, cs)
   209  			return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   210  		} else {
   211  			//Substring column with length parameter
   212  			columnStartCol := castTVecAsInt64(inputVecs[1])
   213  			columnLengthCol := castTVecAsInt64(inputVecs[2])
   214  			cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()}
   215  			substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs)
   216  			return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   217  		}
   218  	}
   219  }
   220  
   221  // substring first paramter is column
   222  func substrSrcCol(inputVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) {
   223  	var paramNum = len(inputVecs)
   224  	srcVector := inputVecs[0]
   225  	startVector := inputVecs[1]
   226  	columnSrcCol := vector.GetStrVectorValues(srcVector)
   227  
   228  	// request new memory space for result column
   229  	results := make([]string, len(columnSrcCol))
   230  
   231  	//set null row
   232  	resultNsp := nulls.NewWithSize(len(results))
   233  	if paramNum == 2 {
   234  		nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp)
   235  	} else {
   236  		nulls.Or(inputVecs[0].Nsp, inputVecs[1].Nsp, resultNsp)
   237  		nulls.Or(inputVecs[2].Nsp, resultNsp, resultNsp)
   238  	}
   239  
   240  	if startVector.IsScalar() {
   241  		if paramNum == 2 {
   242  			// get start constant value
   243  			startValue, err := castConstAsInt64(proc.Ctx, startVector, 0)
   244  			if err != nil {
   245  				return nil, err
   246  			}
   247  			if startValue > 0 {
   248  				substring.SubstringFromLeftConstOffsetUnbounded(columnSrcCol, results, startValue-1)
   249  			} else if startValue < 0 {
   250  				substring.SubstringFromRightConstOffsetUnbounded(columnSrcCol, results, -startValue)
   251  			} else {
   252  				//startValue == 0
   253  				substring.SubstringFromZeroConstOffsetUnbounded(columnSrcCol, results)
   254  			}
   255  			return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   256  		} else { //has third parameter
   257  			lengthVector := inputVecs[2]
   258  			// if length parameter is constant
   259  			if lengthVector.IsScalar() {
   260  				// get start constant value
   261  				startValue, err := castConstAsInt64(proc.Ctx, startVector, 0)
   262  				if err != nil {
   263  					return nil, err
   264  				}
   265  				// get length constant value
   266  				lengthValue, err := castConstAsInt64(proc.Ctx, lengthVector, 0)
   267  				if err != nil {
   268  					return nil, err
   269  				}
   270  				if startValue > 0 {
   271  					substring.SubstringFromLeftConstOffsetBounded(columnSrcCol, results, startValue-1, lengthValue)
   272  				} else if startValue < 0 {
   273  					substring.SubstringFromRightConstOffsetBounded(columnSrcCol, results, -startValue, lengthValue)
   274  				} else {
   275  					//startValue == 0
   276  					substring.SubstringFromZeroConstOffsetBounded(columnSrcCol, results)
   277  				}
   278  				return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   279  			} else {
   280  				columnStartCol := castTVecAsInt64(inputVecs[1])
   281  				columnLengthCol := castTVecAsInt64(inputVecs[2])
   282  				cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()}
   283  				substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs)
   284  				return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   285  			}
   286  		}
   287  	} else {
   288  		if paramNum == 2 {
   289  			//The pos column is a variable or an expression
   290  			columnStartCol := castTVecAsInt64(inputVecs[1])
   291  			cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar()}
   292  			substring.SubstringDynamicOffsetUnbounded(columnSrcCol, results, columnStartCol, cs)
   293  			return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   294  		} else {
   295  			columnStartCol := castTVecAsInt64(inputVecs[1])
   296  			columnLengthCol := castTVecAsInt64(inputVecs[2])
   297  			cs := []bool{inputVecs[0].IsScalar(), inputVecs[1].IsScalar(), inputVecs[2].IsScalar()}
   298  			substring.SubstringDynamicOffsetBounded(columnSrcCol, results, columnStartCol, columnLengthCol, cs)
   299  			return vector.NewWithStrings(srcVector.Typ, results, resultNsp, proc.Mp()), nil
   300  		}
   301  	}
   302  }
   303  
   304  // calcResultVectorRows : Calculate size of returned result rows, which is used to calculate the memory space required
   305  func calcResultVectorRows(inputVecs []*vector.Vector) int {
   306  	if len(inputVecs) == 2 {
   307  		if inputVecs[0].IsScalar() && inputVecs[1].IsScalar() {
   308  			return 1
   309  		} else {
   310  			return vector.Length(inputVecs[0])
   311  		}
   312  	} else {
   313  		if inputVecs[0].IsScalar() && inputVecs[1].IsScalar() && inputVecs[2].IsScalar() {
   314  			return 1
   315  		} else {
   316  			return vector.Length(inputVecs[0])
   317  		}
   318  	}
   319  }