github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/mergesort/mergesort.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mergesort
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/uuids"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    24  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/bools"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/decimal128s"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/decimal64s"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/numerics"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/rowid"
    29  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/txnts"
    30  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/mergesort/varchar"
    31  )
    32  
    33  func SortBlockColumns(cols []containers.Vector, pk int) ([]uint32, error) {
    34  	sortedIdx := make([]uint32, cols[pk].Length())
    35  
    36  	switch cols[pk].GetType().Oid {
    37  	case types.T_bool:
    38  		bools.Sort(cols[pk], sortedIdx)
    39  	case types.T_int8:
    40  		numerics.Sort[int8](cols[pk], sortedIdx)
    41  	case types.T_int16:
    42  		numerics.Sort[int16](cols[pk], sortedIdx)
    43  	case types.T_int32:
    44  		numerics.Sort[int32](cols[pk], sortedIdx)
    45  	case types.T_int64:
    46  		numerics.Sort[int64](cols[pk], sortedIdx)
    47  	case types.T_uint8:
    48  		numerics.Sort[uint8](cols[pk], sortedIdx)
    49  	case types.T_uint16:
    50  		numerics.Sort[uint16](cols[pk], sortedIdx)
    51  	case types.T_uint32:
    52  		numerics.Sort[uint32](cols[pk], sortedIdx)
    53  	case types.T_uint64:
    54  		numerics.Sort[uint64](cols[pk], sortedIdx)
    55  	case types.T_float32:
    56  		numerics.Sort[float32](cols[pk], sortedIdx)
    57  	case types.T_float64:
    58  		numerics.Sort[float64](cols[pk], sortedIdx)
    59  	case types.T_date:
    60  		numerics.Sort[types.Date](cols[pk], sortedIdx)
    61  	case types.T_time:
    62  		numerics.Sort[types.Time](cols[pk], sortedIdx)
    63  	case types.T_datetime:
    64  		numerics.Sort[types.Datetime](cols[pk], sortedIdx)
    65  	case types.T_decimal64:
    66  		decimal64s.Sort(cols[pk], sortedIdx)
    67  	case types.T_decimal128:
    68  		decimal128s.Sort(cols[pk], sortedIdx)
    69  	case types.T_timestamp:
    70  		numerics.Sort[types.Timestamp](cols[pk], sortedIdx)
    71  	case types.T_uuid:
    72  		uuids.Sort(cols[pk], sortedIdx)
    73  	case types.T_TS:
    74  		txnts.Sort(cols[pk], sortedIdx)
    75  	case types.T_Rowid:
    76  		rowid.Sort(cols[pk], sortedIdx)
    77  	case types.T_char, types.T_json, types.T_varchar, types.T_blob, types.T_text:
    78  		varchar.Sort(cols[pk], sortedIdx)
    79  	default:
    80  		panic(fmt.Sprintf("%s not supported", cols[pk].GetType().String()))
    81  	}
    82  
    83  	for i := 0; i < len(cols); i++ {
    84  		if i == pk {
    85  			continue
    86  		}
    87  		cols[i] = Shuffle(cols[i], sortedIdx)
    88  	}
    89  	return sortedIdx, nil
    90  }
    91  
    92  func MergeSortedColumn(column []containers.Vector, sortedIdx *[]uint32, fromLayout, toLayout []uint32) (ret []containers.Vector, mapping []uint32) {
    93  	switch column[0].GetType().Oid {
    94  	case types.T_bool:
    95  		ret, mapping = bools.Merge(column, sortedIdx, fromLayout, toLayout)
    96  	case types.T_int8:
    97  		ret, mapping = numerics.Merge[int8](column, sortedIdx, fromLayout, toLayout)
    98  	case types.T_int16:
    99  		ret, mapping = numerics.Merge[int16](column, sortedIdx, fromLayout, toLayout)
   100  	case types.T_int32:
   101  		ret, mapping = numerics.Merge[int32](column, sortedIdx, fromLayout, toLayout)
   102  	case types.T_int64:
   103  		ret, mapping = numerics.Merge[int64](column, sortedIdx, fromLayout, toLayout)
   104  	case types.T_uint8:
   105  		ret, mapping = numerics.Merge[uint8](column, sortedIdx, fromLayout, toLayout)
   106  	case types.T_uint16:
   107  		ret, mapping = numerics.Merge[uint16](column, sortedIdx, fromLayout, toLayout)
   108  	case types.T_uint32:
   109  		ret, mapping = numerics.Merge[uint32](column, sortedIdx, fromLayout, toLayout)
   110  	case types.T_uint64:
   111  		ret, mapping = numerics.Merge[uint64](column, sortedIdx, fromLayout, toLayout)
   112  	case types.T_float32:
   113  		ret, mapping = numerics.Merge[float32](column, sortedIdx, fromLayout, toLayout)
   114  	case types.T_float64:
   115  		ret, mapping = numerics.Merge[float64](column, sortedIdx, fromLayout, toLayout)
   116  	case types.T_date:
   117  		ret, mapping = numerics.Merge[types.Date](column, sortedIdx, fromLayout, toLayout)
   118  	case types.T_time:
   119  		ret, mapping = numerics.Merge[types.Time](column, sortedIdx, fromLayout, toLayout)
   120  	case types.T_datetime:
   121  		ret, mapping = numerics.Merge[types.Datetime](column, sortedIdx, fromLayout, toLayout)
   122  	case types.T_decimal64:
   123  		ret, mapping = decimal64s.Merge(column, sortedIdx, fromLayout, toLayout)
   124  	case types.T_decimal128:
   125  		ret, mapping = decimal128s.Merge(column, sortedIdx, fromLayout, toLayout)
   126  	case types.T_uuid:
   127  		ret, mapping = uuids.Merge(column, sortedIdx, fromLayout, toLayout)
   128  	case types.T_timestamp:
   129  		ret, mapping = numerics.Merge[types.Timestamp](column, sortedIdx, fromLayout, toLayout)
   130  	case types.T_TS:
   131  		ret, mapping = txnts.Merge(column, sortedIdx, fromLayout, toLayout)
   132  	case types.T_Rowid:
   133  		ret, mapping = rowid.Merge(column, sortedIdx, fromLayout, toLayout)
   134  	case types.T_char, types.T_json, types.T_varchar, types.T_blob, types.T_text:
   135  		ret, mapping = varchar.Merge(column, sortedIdx, fromLayout, toLayout)
   136  	default:
   137  		panic(fmt.Sprintf("%s not supported", column[0].GetType().String()))
   138  	}
   139  	return
   140  }
   141  
   142  func Reshape(column []containers.Vector, fromLayout, toLayout []uint32) (ret []containers.Vector) {
   143  	ret = make([]containers.Vector, len(toLayout))
   144  	fromIdx := 0
   145  	fromOffset := 0
   146  	for i := 0; i < len(toLayout); i++ {
   147  		ret[i] = containers.MakeVector(column[0].GetType(), column[0].Nullable())
   148  		toOffset := 0
   149  		for toOffset < int(toLayout[i]) {
   150  			fromLeft := fromLayout[fromIdx] - uint32(fromOffset)
   151  			if fromLeft == 0 {
   152  				fromIdx++
   153  				fromOffset = 0
   154  				fromLeft = fromLayout[fromIdx]
   155  			}
   156  			length := 0
   157  			if fromLeft < toLayout[i]-uint32(toOffset) {
   158  				length = int(fromLeft)
   159  			} else {
   160  				length = int(toLayout[i]) - toOffset
   161  			}
   162  			cloned := column[fromIdx].CloneWindow(fromOffset, length)
   163  			defer cloned.Close()
   164  			ret[i].Extend(cloned)
   165  			fromOffset += length
   166  			toOffset += length
   167  		}
   168  	}
   169  	for _, v := range column {
   170  		v.Close()
   171  	}
   172  	return
   173  }
   174  
   175  func ShuffleColumn(column []containers.Vector, sortedIdx []uint32, fromLayout, toLayout []uint32) (ret []containers.Vector) {
   176  	ret = Multiplex(column, sortedIdx, fromLayout, toLayout)
   177  	return
   178  }
   179  
   180  //func MergeBlocksToSegment(blks []*batch.Batch, pk int) error {
   181  //	n := len(blks) * blks[0].Vecs[pk].Length()
   182  //	mergedSrc := make([]uint16, n)
   183  //
   184  //	col := make([]*vector.Vector, len(blks))
   185  //	for i := 0; i < len(blks); i++ {
   186  //		col[i] = blks[i].Vecs[pk]
   187  //	}
   188  //
   189  //	switch blks[0].Vecs[pk].Typ.Oid {
   190  //	case types.T_int8:
   191  //		int8s.Merge(col, mergedSrc)
   192  //	case types.T_int16:
   193  //		int16s.Merge(col, mergedSrc)
   194  //	case types.T_int32:
   195  //		int32s.Merge(col, mergedSrc)
   196  //	case types.T_int64:
   197  //		int64s.Merge(col, mergedSrc)
   198  //	case types.T_uint8:
   199  //		uint8s.Merge(col, mergedSrc)
   200  //	case types.T_uint16:
   201  //		uint16s.Merge(col, mergedSrc)
   202  //	case types.T_uint32:
   203  //		uint32s.Merge(col, mergedSrc)
   204  //	case types.T_uint64:
   205  //		uint64s.Merge(col, mergedSrc)
   206  //	case types.T_float32:
   207  //		float32s.Merge(col, mergedSrc)
   208  //	case types.T_float64:
   209  //		float64s.Merge(col, mergedSrc)
   210  //	case types.T_date:
   211  //		dates.Merge(col, mergedSrc)
   212  //	case types.T_datetime:
   213  //		datetimes.Merge(col, mergedSrc)
   214  //	case types.T_char, types.T_json, types.T_varchar, types.T_blob:
   215  //		varchar.Merge(col, mergedSrc)
   216  //	}
   217  //
   218  //	for j := 0; j < len(blks[0].Vecs); j++ {
   219  //		if j == pk {
   220  //			continue
   221  //		}
   222  //		for i := 0; i < len(blks); i++ {
   223  //			col[i] = blks[i].Vecs[j]
   224  //		}
   225  //
   226  //		switch blks[0].Vecs[j].Typ.Oid {
   227  //		case types.T_int8:
   228  //			int8s.Multiplex(col, mergedSrc)
   229  //		case types.T_int16:
   230  //			int16s.Multiplex(col, mergedSrc)
   231  //		case types.T_int32:
   232  //			int32s.Multiplex(col, mergedSrc)
   233  //		case types.T_int64:
   234  //			int64s.Multiplex(col, mergedSrc)
   235  //		case types.T_uint8:
   236  //			uint8s.Multiplex(col, mergedSrc)
   237  //		case types.T_uint16:
   238  //			uint16s.Multiplex(col, mergedSrc)
   239  //		case types.T_uint32:
   240  //			uint32s.Multiplex(col, mergedSrc)
   241  //		case types.T_uint64:
   242  //			uint64s.Multiplex(col, mergedSrc)
   243  //		case types.T_float32:
   244  //			float32s.Multiplex(col, mergedSrc)
   245  //		case types.T_float64:
   246  //			float64s.Multiplex(col, mergedSrc)
   247  //		case types.T_date:
   248  //			dates.Multiplex(col, mergedSrc)
   249  //		case types.T_datetime:
   250  //			datetimes.Multiplex(col, mergedSrc)
   251  //		case types.T_char, types.T_json, types.T_varchar, types.T_blob:
   252  //			varchar.Multiplex(col, mergedSrc)
   253  //		}
   254  //	}
   255  //
   256  //	return nil
   257  //}