github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/plan/function/builtin/multi/rpad.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package multi
    16  
    17  import (
    18  	"context"
    19  	"math"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function/builtin/binary"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    26  )
    27  
    28  func Rpad(origVecs []*vector.Vector, proc *process.Process) (*vector.Vector, error) {
    29  	if origVecs[0].IsScalarNull() || origVecs[1].IsScalarNull() || origVecs[2].IsScalarNull() {
    30  		return proc.AllocScalarNullVector(origVecs[0].Typ), nil
    31  	}
    32  
    33  	isConst := []bool{origVecs[0].IsScalar(), origVecs[1].IsScalar(), origVecs[2].IsScalar()}
    34  
    35  	// gets all args
    36  	strs := vector.GetStrVectorValues(origVecs[0])
    37  	sizes := origVecs[1].Col
    38  	if _, ok := sizes.([]types.Varlena); ok {
    39  		sizes = vector.MustStrCols(origVecs[1])
    40  	}
    41  
    42  	var padstrs interface{}
    43  	// resolve padstrs,
    44  	if origVecs[2].GetType().IsVarlen() {
    45  		padstrs = vector.GetStrVectorValues(origVecs[2])
    46  	} else {
    47  		// keep orig type
    48  		padstrs = origVecs[2].Col
    49  	}
    50  	oriNsps := []*nulls.Nulls{origVecs[0].Nsp, origVecs[1].Nsp, origVecs[2].Nsp}
    51  
    52  	// gets a new vector to store our result
    53  	rowCount := vector.Length(origVecs[0])
    54  
    55  	if origVecs[0].IsScalar() && origVecs[1].IsScalar() && origVecs[2].IsScalar() {
    56  		//evaluate the result
    57  		result, nsp, err := rpad(proc.Ctx, rowCount, strs, sizes, padstrs, isConst, oriNsps)
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  		resultVec := vector.NewWithStrings(origVecs[0].Typ, result, nsp, proc.Mp())
    62  		return resultVec, nil
    63  	}
    64  
    65  	result, nsp, err := rpad(proc.Ctx, rowCount, strs, sizes, padstrs, isConst, oriNsps)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	resultVec := vector.NewWithStrings(origVecs[0].Typ, result, nsp, proc.Mp())
    70  	return resultVec, nil
    71  }
    72  
    73  var (
    74  	MaxPad int64
    75  )
    76  
    77  const UINT16_MAX = ^uint16(0)
    78  
    79  func init() {
    80  	MaxPad = int64(16 * 1024 * 1024)
    81  }
    82  
    83  // rpad returns a *types.Bytes containing the padded strings and a corresponding bitmap *nulls.Nulls.
    84  // rpad is multibyte-safe
    85  func rpad(ctx context.Context, rowCount int, strs []string, sizes interface{}, pads interface{}, isConst []bool, oriNsp []*nulls.Nulls) ([]string, *nulls.Nulls, error) {
    86  	// typecast
    87  	var padstrs []string
    88  	var err error
    89  	switch pd := pads.(type) {
    90  	case []string:
    91  		padstrs = pd
    92  	case []int64:
    93  		padstrs = make([]string, len(pd))
    94  		_, err = binary.Int64ToBytes(pd, padstrs)
    95  	case []int32:
    96  		padstrs = make([]string, len(pd))
    97  		_, err = binary.Int32ToBytes(pd, padstrs)
    98  	case []int16:
    99  		padstrs = make([]string, len(pd))
   100  		_, err = binary.Int16ToBytes(pd, padstrs)
   101  	case []int8:
   102  		padstrs = make([]string, len(pd))
   103  		_, err = binary.Int8ToBytes(pd, padstrs)
   104  	case []uint64:
   105  		padstrs = make([]string, len(pd))
   106  		_, err = binary.Uint64ToBytes(pd, padstrs)
   107  	case []uint32:
   108  		padstrs = make([]string, len(pd))
   109  		_, err = binary.Uint32ToBytes(pd, padstrs)
   110  	case []uint16:
   111  		padstrs = make([]string, len(pd))
   112  		_, err = binary.Uint16ToBytes(pd, padstrs)
   113  	case []uint8:
   114  		padstrs = make([]string, len(pd))
   115  		_, err = binary.Uint8ToBytes(pd, padstrs)
   116  	case []float32:
   117  		padstrs = make([]string, len(pd))
   118  		_, err = binary.Float32ToBytes(pd, padstrs)
   119  	case []float64:
   120  		padstrs = make([]string, len(pd))
   121  		_, err = binary.Float64ToBytes(pd, padstrs)
   122  	default:
   123  		// empty string
   124  		padstrs = append(padstrs, "")
   125  		isConst[2] = true
   126  	}
   127  	if err != nil {
   128  		return nil, nil, err
   129  	}
   130  
   131  	// do rpad
   132  	var result []string
   133  	var nsp *nulls.Nulls
   134  	var err2 error
   135  	switch sz := sizes.(type) {
   136  	case []int64:
   137  		result, nsp = rpadInt64(rowCount, strs, sz, padstrs, isConst, oriNsp)
   138  	case []int32:
   139  		sizesInt64 := make([]int64, len(sz))
   140  		sizesInt64, err2 = binary.Int32ToInt64(ctx, sz, sizesInt64)
   141  		result, nsp = rpadInt64(rowCount, strs, sizesInt64, padstrs, isConst, oriNsp)
   142  	case []int16:
   143  		sizesInt64 := make([]int64, len(sz))
   144  		sizesInt64, err2 = binary.Int16ToInt64(ctx, sz, sizesInt64)
   145  		result, nsp = rpadInt64(rowCount, strs, sizesInt64, padstrs, isConst, oriNsp)
   146  	case []int8:
   147  		sizesInt64 := make([]int64, len(sz))
   148  		sizesInt64, err2 = binary.Int8ToInt64(ctx, sz, sizesInt64)
   149  		result, nsp = rpadInt64(rowCount, strs, sizesInt64, padstrs, isConst, oriNsp)
   150  	case []float64:
   151  		sizesInt64 := make([]int64, len(sz))
   152  		isEmptyStringOrNull := make([]int, len(sz))
   153  		sizesInt64, err2 = binary.Float64ToInt64(ctx, sz, sizesInt64, isEmptyStringOrNull)
   154  		result, nsp = rpadInt64(rowCount, strs, sizesInt64, padstrs, isConst, oriNsp, isEmptyStringOrNull)
   155  	case []float32:
   156  		sizesInt64 := make([]int64, len(sz))
   157  		sizesInt64, err2 = binary.Float32ToInt64(ctx, sz, sizesInt64)
   158  		result, nsp = rpadInt64(rowCount, strs, sizesInt64, padstrs, isConst, oriNsp)
   159  	case []uint64:
   160  		result, nsp = rpadUint64(rowCount, strs, sz, padstrs, isConst, oriNsp)
   161  	case []uint32:
   162  		sizesUint64 := make([]uint64, len(sz))
   163  		sizesUint64, err2 = binary.Uint32ToUint64(ctx, sz, sizesUint64)
   164  		result, nsp = rpadUint64(rowCount, strs, sizesUint64, padstrs, isConst, oriNsp)
   165  	case []uint16:
   166  		sizesUint64 := make([]uint64, len(sz))
   167  		sizesUint64, err2 = binary.Uint16ToUint64(ctx, sz, sizesUint64)
   168  		result, nsp = rpadUint64(rowCount, strs, sizesUint64, padstrs, isConst, oriNsp)
   169  	case []uint8:
   170  		sizesUint64 := make([]uint64, len(sz))
   171  		sizesUint64, err2 = binary.Uint8ToUint64(ctx, sz, sizesUint64)
   172  		result, nsp = rpadUint64(rowCount, strs, sizesUint64, padstrs, isConst, oriNsp)
   173  	case []string:
   174  		// XXX What is this code?
   175  		sizesFloat64 := make([]float64, len(sz))
   176  		isEmptyStringOrNull := make([]int, len(sz))
   177  		sizesFloat64, err2 = binary.BytesToFloat(ctx, sz, sizesFloat64, false, isEmptyStringOrNull)
   178  		sizesInt64 := make([]int64, len(sz))
   179  		for i, val := range sizesFloat64 { //for func rpad,like '1.8', is 1, not 2.
   180  			sizesInt64[i] = int64(math.Floor(val))
   181  		}
   182  		result, nsp = rpadInt64(rowCount, strs, sizesInt64, padstrs, isConst, oriNsp, isEmptyStringOrNull)
   183  	default:
   184  		// return empty strings if sizes is a non-numerical type slice
   185  		nsp = new(nulls.Nulls)
   186  		nulls.Set(nsp, oriNsp[0])
   187  		result = make([]string, len(strs))
   188  	}
   189  	if err2 != nil {
   190  		return nil, nil, err2
   191  	}
   192  	return result, nsp, nil
   193  }
   194  
   195  // note that: for flag:
   196  // 0: nothing todo
   197  // 1: is an overflow flag
   198  // 2: is an parse_error flag
   199  func rpadInt64(rowCount int, strs []string, sizes []int64, padstrs []string, isConst []bool, oriNsp []*nulls.Nulls, isEmptyStringOrNull ...[]int) ([]string, *nulls.Nulls) {
   200  	results := make([]string, rowCount)
   201  	resultNsp := new(nulls.Nulls)
   202  	usedEmptyStringOrNull := len(isEmptyStringOrNull) > 0
   203  	for i := 0; i < rowCount; i++ {
   204  		var newSize int64
   205  		var EmptyStringOrNull int //we use flag1 to see if we need to give "" but not NULL
   206  		if isConst[1] {
   207  			if usedEmptyStringOrNull {
   208  				EmptyStringOrNull = isEmptyStringOrNull[0][0]
   209  			}
   210  			// accepts a constant literal
   211  			newSize = sizes[0]
   212  		} else {
   213  			if usedEmptyStringOrNull {
   214  				EmptyStringOrNull = isEmptyStringOrNull[0][i]
   215  			}
   216  			// accepts an attribute name
   217  			newSize = sizes[i]
   218  		}
   219  		if EmptyStringOrNull == 2 {
   220  			continue
   221  		}
   222  		// gets NULL if any arg is NULL or the newSize < 0
   223  		if row := uint64(i); nulls.Contains(oriNsp[0], row) || nulls.Contains(oriNsp[1], row) || nulls.Contains(oriNsp[2], row) || newSize < 0 || newSize > int64(UINT16_MAX) || newSize > MaxPad {
   224  			nulls.Add(resultNsp, row)
   225  			continue
   226  		}
   227  
   228  		var padRunes []rune
   229  		if isConst[2] {
   230  			padRunes = []rune(padstrs[0])
   231  		} else {
   232  			padRunes = []rune(padstrs[i])
   233  		}
   234  		var oriRunes []rune
   235  		if isConst[0] {
   236  			oriRunes = []rune(strs[0])
   237  		} else {
   238  			oriRunes = []rune(strs[i])
   239  		}
   240  		// gets the padded string
   241  		if int(newSize) <= len(oriRunes) {
   242  			// truncates the original string
   243  			tmp := string(oriRunes[:newSize])
   244  			results[i] = tmp
   245  		} else {
   246  			if len(padRunes) == 0 {
   247  				// gets an empty string if the padRunes is also an empty string and newSize > len(oriRunes)
   248  				// E.x. in mysql 8.0
   249  				// select rpad("test",5,"");
   250  				// +-----------------+
   251  				// |rpad("test",5,"")|
   252  				// +-----------------+
   253  				// |                 |
   254  				// +-----------------+
   255  				// results[i] is still empty
   256  			} else {
   257  				padding := int(newSize) - len(oriRunes)
   258  				// builds a padded string
   259  				var tmp string
   260  				if isConst[0] {
   261  					tmp += strs[0]
   262  				} else {
   263  					tmp += strs[i]
   264  				}
   265  				// adds some pads
   266  				for j := 0; j < padding/len(padRunes); j++ {
   267  					tmp += string(padRunes)
   268  				}
   269  				// adds the remaining part
   270  				tmp += string(padRunes[:padding%len(padRunes)])
   271  				results[i] = tmp
   272  			}
   273  		}
   274  	}
   275  	return results, resultNsp
   276  }
   277  
   278  func rpadUint64(rowCount int, strs []string, sizes []uint64, padstrs []string, isConst []bool, oriNsp []*nulls.Nulls) ([]string, *nulls.Nulls) {
   279  	isz := make([]int64, len(sizes))
   280  	for i, s := range sizes {
   281  		isz[i] = int64(s)
   282  	}
   283  	return rpadInt64(rowCount, strs, isz, padstrs, isConst, oriNsp)
   284  }