github.com/matrixorigin/matrixone@v0.7.0/pkg/vectorize/substring/substring.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package substring
    16  
    17  import (
    18  	"math"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/container/types"
    21  )
    22  
    23  /*
    24  Substring function rule description
    25  */
    26  
    27  // Slice from left to right, starting from 0
    28  func getSliceFromLeft(s string, offset int64) string {
    29  	sourceRune := []rune(s)
    30  	elemsize := int64(len(sourceRune))
    31  	if offset > elemsize {
    32  		return ""
    33  	}
    34  	substrRune := sourceRune[offset:]
    35  	return string(substrRune)
    36  }
    37  
    38  func getSliceOffsetLen(s string, offset int64, length int64) string {
    39  	sourceRune := []rune(s)
    40  	elemsize := int64(len(sourceRune))
    41  	if offset < 0 {
    42  		offset += elemsize
    43  		if offset < 0 {
    44  			return ""
    45  		}
    46  	}
    47  	if offset >= elemsize {
    48  		return ""
    49  	}
    50  
    51  	if length <= 0 {
    52  		return ""
    53  	} else {
    54  		end := offset + length
    55  		if end > elemsize {
    56  			end = elemsize
    57  		}
    58  		substrRune := sourceRune[offset:end]
    59  		return string(substrRune)
    60  	}
    61  }
    62  
    63  // Cut the slice with length from left to right, starting from 0
    64  func getSliceFromLeftWithLength(s string, offset int64, length int64) string {
    65  	if offset < 0 {
    66  		return ""
    67  	}
    68  	return getSliceOffsetLen(s, offset, length)
    69  }
    70  
    71  // From right to left, cut the slice with length from 1
    72  func getSliceFromRightWithLength(s string, offset int64, length int64) string {
    73  	return getSliceOffsetLen(s, -offset, length)
    74  }
    75  
    76  // Cut slices from right to left, starting from 1
    77  func getSliceFromRight(s string, offset int64) string {
    78  	sourceRune := []rune(s)
    79  	elemsize := int64(len(sourceRune))
    80  	if offset > elemsize {
    81  		return ""
    82  	}
    83  	substrRune := sourceRune[elemsize-offset:]
    84  	return string(substrRune)
    85  }
    86  
    87  // The length parameter is not bound. Cut the string from the left
    88  func SubstringFromLeftConstOffsetUnbounded(src []string, res []string, start int64) []string {
    89  	for idx, s := range src {
    90  		res[idx] = getSliceFromLeft(s, start)
    91  	}
    92  	return res
    93  }
    94  
    95  // The length parameter is not bound. Cut the string from the right
    96  func SubstringFromRightConstOffsetUnbounded(src []string, res []string, start int64) []string {
    97  	for idx, s := range src {
    98  		res[idx] = getSliceFromRight(s, start)
    99  	}
   100  	return res
   101  }
   102  
   103  // Per MySQL substring doc, if pos is 0, return empty strings.
   104  func SubstringFromZeroConstOffsetUnbounded(src []string, res []string) []string {
   105  	for idx := range src {
   106  		res[idx] = ""
   107  	}
   108  	return res
   109  }
   110  
   111  // Per MySQL substring doc, if pos is 0, return empty strings.
   112  func SubstringFromZeroConstOffsetBounded(src []string, res []string) []string {
   113  	for idx := range src {
   114  		res[idx] = ""
   115  	}
   116  	return res
   117  }
   118  
   119  // Without binding the length parameter, dynamically cut the string
   120  func SubstringDynamicOffsetUnbounded[T types.BuiltinNumber](src []string, res []string, startColumn []T, cs []bool) []string {
   121  	for idx := range src {
   122  		var s string
   123  		if cs[0] {
   124  			s = src[0]
   125  		} else {
   126  			s = src[idx]
   127  		}
   128  
   129  		var startValue int64
   130  		if cs[1] {
   131  			startValue = int64(startColumn[0])
   132  		} else {
   133  			startValue = int64(startColumn[idx])
   134  		}
   135  
   136  		if startValue > math.MaxInt32 || startValue < math.MinInt32 {
   137  			// XXX better error handling
   138  			panic("substring index out of range")
   139  		}
   140  
   141  		if startValue > 0 {
   142  			res[idx] = getSliceFromLeft(s, startValue-1)
   143  		} else if startValue < 0 {
   144  			res[idx] = getSliceFromRight(s, -startValue)
   145  		} else {
   146  			// MySQL: pos 0 return empty string
   147  			res[idx] = ""
   148  		}
   149  	}
   150  	return res
   151  }
   152  
   153  // bound length parameter. Cut the string from left
   154  func SubstringFromLeftConstOffsetBounded(src []string, res []string, start int64, length int64) []string {
   155  	for idx, s := range src {
   156  		res[idx] = getSliceFromLeftWithLength(s, start, length)
   157  	}
   158  	return res
   159  }
   160  
   161  // bound length parameter. Cut the string from right
   162  func SubstringFromRightConstOffsetBounded(src []string, res []string, start, length int64) []string {
   163  	for idx, s := range src {
   164  		res[idx] = getSliceFromRightWithLength(s, start, length)
   165  	}
   166  	return res
   167  }
   168  
   169  // bound the length parameter, dynamically cut the string
   170  func SubstringDynamicOffsetBounded[T1, T2 types.BuiltinNumber](src []string, res []string, startColumn []T1, lengthColumn []T2, cs []bool) []string {
   171  	for idx, s := range src {
   172  		//get substring pos parameter value
   173  		var startValue, lengthValue int64
   174  
   175  		if cs[1] {
   176  			startValue = int64(startColumn[0])
   177  		} else {
   178  			startValue = int64(startColumn[idx])
   179  		}
   180  		if startValue > math.MaxInt32 || startValue < math.MinInt32 {
   181  			panic("substring start value out of bound")
   182  		}
   183  
   184  		if cs[2] {
   185  			lengthValue = int64(lengthColumn[0])
   186  		} else {
   187  			lengthValue = int64(lengthColumn[idx])
   188  		}
   189  		if lengthValue > math.MaxInt32 || lengthValue < math.MinInt32 {
   190  			panic("substring length value out of bound")
   191  		}
   192  
   193  		if startValue > 0 {
   194  			res[idx] = getSliceFromLeftWithLength(s, startValue-1, lengthValue)
   195  		} else if startValue < 0 {
   196  			res[idx] = getSliceFromRightWithLength(s, -startValue, lengthValue)
   197  		} else {
   198  			res[idx] = ""
   199  		}
   200  	}
   201  	return res
   202  }