github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/memoryengine/shard_hash.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package memoryengine
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"hash/fnv"
    21  	"sort"
    22  	"unsafe"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    26  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    27  	"github.com/matrixorigin/matrixone/pkg/container/types"
    28  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    29  	logservicepb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    30  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    31  	"github.com/matrixorigin/matrixone/pkg/vm/engine"
    32  )
    33  
    34  type HashShard struct {
    35  	mp *mpool.MPool
    36  }
    37  
    38  func NewHashShard(mp *mpool.MPool) *HashShard {
    39  	return &HashShard{
    40  		mp: mp,
    41  	}
    42  }
    43  
    44  func (*HashShard) Batch(
    45  	ctx context.Context,
    46  	tableID ID,
    47  	getDefs getDefsFunc,
    48  	bat *batch.Batch,
    49  	nodes []logservicepb.DNStore,
    50  ) (
    51  	sharded []*ShardedBatch,
    52  	err error,
    53  ) {
    54  
    55  	// get defs
    56  	defs, err := getDefs(ctx)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	// get shard key
    62  	var primaryAttrs []engine.Attribute
    63  	for _, def := range defs {
    64  		attr, ok := def.(*engine.AttributeDef)
    65  		if !ok {
    66  			continue
    67  		}
    68  		if attr.Attr.Primary {
    69  			primaryAttrs = append(primaryAttrs, attr.Attr)
    70  		}
    71  	}
    72  	sort.Slice(primaryAttrs, func(i, j int) bool {
    73  		return primaryAttrs[i].Name < primaryAttrs[j].Name
    74  	})
    75  	if len(primaryAttrs) == 0 {
    76  		// no shard key
    77  		return nil, nil
    78  	}
    79  	type keyInfo struct {
    80  		Attr  engine.Attribute
    81  		Index int
    82  	}
    83  	var infos []keyInfo
    84  	for _, attr := range primaryAttrs {
    85  		for i, name := range bat.Attrs {
    86  			if name == attr.Name {
    87  				infos = append(infos, keyInfo{
    88  					Attr:  attr,
    89  					Index: i,
    90  				})
    91  			}
    92  		}
    93  	}
    94  
    95  	// shards
    96  	var shards []*Shard
    97  	for _, store := range nodes {
    98  		for _, info := range store.Shards {
    99  			shards = append(shards, &Shard{
   100  				DNShardRecord: metadata.DNShardRecord{
   101  					ShardID: info.ShardID,
   102  				},
   103  				ReplicaID: info.ReplicaID,
   104  				Address:   store.ServiceAddress,
   105  			})
   106  		}
   107  	}
   108  	sort.Slice(shards, func(i, j int) bool {
   109  		return shards[i].ShardID < shards[j].ShardID
   110  	})
   111  	m := make(map[*Shard]*batch.Batch)
   112  	for _, shard := range shards {
   113  		batchCopy := *bat
   114  		for i := range batchCopy.Zs {
   115  			batchCopy.Zs[i] = 0
   116  		}
   117  		m[shard] = &batchCopy
   118  	}
   119  
   120  	// shard batch
   121  	for i := 0; i < bat.Length(); i++ {
   122  		hasher := fnv.New32()
   123  		for _, info := range infos {
   124  			vec := bat.Vecs[info.Index]
   125  			bs, err := getBytesFromPrimaryVectorForHash(ctx, vec, i, info.Attr.Type)
   126  			if err != nil {
   127  				return nil, err
   128  			}
   129  			_, err = hasher.Write(bs)
   130  			if err != nil {
   131  				panic(err)
   132  			}
   133  		}
   134  		n := int(hasher.Sum32())
   135  		shard := shards[n%len(shards)]
   136  		m[shard].Zs[i] = 1
   137  	}
   138  
   139  	for shard, bat := range m {
   140  		isEmpty := true
   141  		for _, i := range bat.Zs {
   142  			if i > 0 {
   143  				isEmpty = false
   144  				break
   145  			}
   146  		}
   147  		if isEmpty {
   148  			continue
   149  		}
   150  		sharded = append(sharded, &ShardedBatch{
   151  			Shard: *shard,
   152  			Batch: bat,
   153  		})
   154  	}
   155  
   156  	return
   157  }
   158  
   159  func (h *HashShard) Vector(
   160  	ctx context.Context,
   161  	tableID ID,
   162  	getDefs getDefsFunc,
   163  	colName string,
   164  	vec *vector.Vector,
   165  	nodes []logservicepb.DNStore,
   166  ) (
   167  	sharded []*ShardedVector,
   168  	err error,
   169  ) {
   170  
   171  	//TODO use vector nulls mask
   172  
   173  	// get defs
   174  	defs, err := getDefs(ctx)
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  
   179  	// get shard key
   180  	var shardAttr *engine.Attribute
   181  	for _, def := range defs {
   182  		attr, ok := def.(*engine.AttributeDef)
   183  		if !ok {
   184  			continue
   185  		}
   186  		if attr.Attr.Primary {
   187  			if attr.Attr.Name == colName {
   188  				shardAttr = &attr.Attr
   189  				break
   190  			}
   191  		}
   192  	}
   193  	if shardAttr == nil {
   194  		// no shard key
   195  		return nil, nil
   196  	}
   197  
   198  	// shards
   199  	var shards []*Shard
   200  	for _, store := range nodes {
   201  		for _, info := range store.Shards {
   202  			shards = append(shards, &Shard{
   203  				DNShardRecord: metadata.DNShardRecord{
   204  					ShardID: info.ShardID,
   205  				},
   206  				ReplicaID: info.ReplicaID,
   207  				Address:   store.ServiceAddress,
   208  			})
   209  		}
   210  	}
   211  	sort.Slice(shards, func(i, j int) bool {
   212  		return shards[i].ShardID < shards[j].ShardID
   213  	})
   214  	m := make(map[*Shard]*vector.Vector)
   215  
   216  	// shard vector
   217  	for i := 0; i < vec.Length(); i++ {
   218  		hasher := fnv.New32()
   219  		bs, err := getBytesFromPrimaryVectorForHash(ctx, vec, i, shardAttr.Type)
   220  		if err != nil {
   221  			return nil, err
   222  		}
   223  		_, err = hasher.Write(bs)
   224  		if err != nil {
   225  			panic(err)
   226  		}
   227  		n := int(hasher.Sum32())
   228  		shard := shards[n%len(shards)]
   229  		shardVec, ok := m[shard]
   230  		if !ok {
   231  			shardVec = vector.New(shardAttr.Type)
   232  			m[shard] = shardVec
   233  		}
   234  		v := getNullableValueFromVector(vec, i)
   235  		appendNullableValueToVector(shardVec, v, h.mp)
   236  	}
   237  
   238  	for shard, vec := range m {
   239  		if vec.Length() == 0 {
   240  			continue
   241  		}
   242  		sharded = append(sharded, &ShardedVector{
   243  			Shard:  *shard,
   244  			Vector: vec,
   245  		})
   246  	}
   247  
   248  	return
   249  }
   250  
   251  var _ ShardPolicy = new(HashShard)
   252  
   253  func getBytesFromPrimaryVectorForHash(ctx context.Context, vec *vector.Vector, i int, typ types.Type) ([]byte, error) {
   254  	if vec.IsConst() {
   255  		panic("primary value vector should not be const")
   256  	}
   257  	if vec.GetNulls().Any() {
   258  		//TODO mimic to pass BVT
   259  		return nil, moerr.NewDuplicate(ctx)
   260  		//panic("primary value vector should not contain nulls")
   261  	}
   262  	if vec.Typ.IsFixedLen() {
   263  		// is slice
   264  		size := vec.Typ.TypeSize()
   265  		l := vec.Length() * size
   266  		data := unsafe.Slice((*byte)(vector.GetPtrAt(vec, 0)), l)
   267  		end := (i + 1) * size
   268  		if end > len(data) {
   269  			//TODO mimic to pass BVT
   270  			return nil, moerr.NewDuplicate(ctx)
   271  			//return nil, moerr.NewInvalidInput("vector size not match")
   272  		}
   273  		return data[i*size : (i+1)*size], nil
   274  	} else if vec.Typ.IsVarlen() {
   275  		slice := vector.GetBytesVectorValues(vec)
   276  		if i >= len(slice) {
   277  			return []byte{}, nil
   278  		}
   279  		return slice[i], nil
   280  	}
   281  	panic(fmt.Sprintf("unknown type: %v", typ))
   282  }
   283  
   284  type Nullable struct {
   285  	IsNull bool
   286  	Value  any
   287  }
   288  
   289  func getNullableValueFromVector(vec *vector.Vector, i int) (value Nullable) {
   290  	if vec.IsConst() {
   291  		i = 0
   292  	}
   293  	switch vec.Typ.Oid {
   294  
   295  	case types.T_bool:
   296  		if vec.IsScalarNull() {
   297  			value = Nullable{
   298  				IsNull: true,
   299  				Value:  false,
   300  			}
   301  			return
   302  		}
   303  		value = Nullable{
   304  			IsNull: vec.GetNulls().Contains(uint64(i)),
   305  			Value:  vec.Col.([]bool)[i],
   306  		}
   307  		return
   308  
   309  	case types.T_int8:
   310  		if vec.IsScalarNull() {
   311  			value = Nullable{
   312  				IsNull: true,
   313  				Value:  int8(0),
   314  			}
   315  			return
   316  		}
   317  		value = Nullable{
   318  			IsNull: vec.GetNulls().Contains(uint64(i)),
   319  			Value:  vec.Col.([]int8)[i],
   320  		}
   321  		return
   322  
   323  	case types.T_int16:
   324  		if vec.IsScalarNull() {
   325  			value = Nullable{
   326  				IsNull: true,
   327  				Value:  int16(0),
   328  			}
   329  			return
   330  		}
   331  		value = Nullable{
   332  			IsNull: vec.GetNulls().Contains(uint64(i)),
   333  			Value:  vec.Col.([]int16)[i],
   334  		}
   335  		return
   336  
   337  	case types.T_int32:
   338  		if vec.IsScalarNull() {
   339  			value = Nullable{
   340  				IsNull: true,
   341  				Value:  int32(0),
   342  			}
   343  			return
   344  		}
   345  		value = Nullable{
   346  			IsNull: vec.GetNulls().Contains(uint64(i)),
   347  			Value:  vec.Col.([]int32)[i],
   348  		}
   349  		return
   350  
   351  	case types.T_int64:
   352  		if vec.IsScalarNull() {
   353  			value = Nullable{
   354  				IsNull: true,
   355  				Value:  int64(0),
   356  			}
   357  			return
   358  		}
   359  		value = Nullable{
   360  			IsNull: vec.GetNulls().Contains(uint64(i)),
   361  			Value:  vec.Col.([]int64)[i],
   362  		}
   363  		return
   364  
   365  	case types.T_uint8:
   366  		if vec.IsScalarNull() {
   367  			value = Nullable{
   368  				IsNull: true,
   369  				Value:  uint8(0),
   370  			}
   371  			return
   372  		}
   373  		value = Nullable{
   374  			IsNull: vec.GetNulls().Contains(uint64(i)),
   375  			Value:  vec.Col.([]uint8)[i],
   376  		}
   377  		return
   378  
   379  	case types.T_uint16:
   380  		if vec.IsScalarNull() {
   381  			value = Nullable{
   382  				IsNull: true,
   383  				Value:  uint16(0),
   384  			}
   385  			return
   386  		}
   387  		value = Nullable{
   388  			IsNull: vec.GetNulls().Contains(uint64(i)),
   389  			Value:  vec.Col.([]uint16)[i],
   390  		}
   391  		return
   392  
   393  	case types.T_uint32:
   394  		if vec.IsScalarNull() {
   395  			value = Nullable{
   396  				IsNull: true,
   397  				Value:  uint32(0),
   398  			}
   399  			return
   400  		}
   401  		value = Nullable{
   402  			IsNull: vec.GetNulls().Contains(uint64(i)),
   403  			Value:  vec.Col.([]uint32)[i],
   404  		}
   405  		return
   406  
   407  	case types.T_uint64:
   408  		if vec.IsScalarNull() {
   409  			value = Nullable{
   410  				IsNull: true,
   411  				Value:  uint64(0),
   412  			}
   413  			return
   414  		}
   415  		value = Nullable{
   416  			IsNull: vec.GetNulls().Contains(uint64(i)),
   417  			Value:  vec.Col.([]uint64)[i],
   418  		}
   419  		return
   420  
   421  	case types.T_float32:
   422  		if vec.IsScalarNull() {
   423  			value = Nullable{
   424  				IsNull: true,
   425  				Value:  float32(0),
   426  			}
   427  			return
   428  		}
   429  		value = Nullable{
   430  			IsNull: vec.GetNulls().Contains(uint64(i)),
   431  			Value:  vec.Col.([]float32)[i],
   432  		}
   433  		return
   434  
   435  	case types.T_float64:
   436  		if vec.IsScalarNull() {
   437  			value = Nullable{
   438  				IsNull: true,
   439  				Value:  float64(0),
   440  			}
   441  			return
   442  		}
   443  		value = Nullable{
   444  			IsNull: vec.GetNulls().Contains(uint64(i)),
   445  			Value:  vec.Col.([]float64)[i],
   446  		}
   447  		return
   448  
   449  	case types.T_tuple:
   450  		if vec.IsScalarNull() {
   451  			value = Nullable{
   452  				IsNull: true,
   453  				Value:  []any{},
   454  			}
   455  			return
   456  		}
   457  		value = Nullable{
   458  			IsNull: vec.GetNulls().Contains(uint64(i)),
   459  			Value:  vec.Col.([][]any)[i],
   460  		}
   461  		return
   462  
   463  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   464  		if vec.IsScalarNull() {
   465  			value = Nullable{
   466  				IsNull: true,
   467  				Value:  []byte{},
   468  			}
   469  			return
   470  		}
   471  		value = Nullable{
   472  			IsNull: vec.GetNulls().Contains(uint64(i)),
   473  			Value:  vec.GetBytes(int64(i)),
   474  		}
   475  		return
   476  
   477  	case types.T_date:
   478  		if vec.IsScalarNull() {
   479  			var zero types.Date
   480  			value = Nullable{
   481  				IsNull: true,
   482  				Value:  zero,
   483  			}
   484  			return
   485  		}
   486  		value = Nullable{
   487  			IsNull: vec.GetNulls().Contains(uint64(i)),
   488  			Value:  vec.Col.([]types.Date)[i],
   489  		}
   490  		return
   491  
   492  	case types.T_time:
   493  		if vec.IsScalarNull() {
   494  			var zero types.Time
   495  			value = Nullable{
   496  				IsNull: true,
   497  				Value:  zero,
   498  			}
   499  			return
   500  		}
   501  		value = Nullable{
   502  			IsNull: vec.GetNulls().Contains(uint64(i)),
   503  			Value:  vec.Col.([]types.Time)[i],
   504  		}
   505  		return
   506  
   507  	case types.T_datetime:
   508  		if vec.IsScalarNull() {
   509  			var zero types.Datetime
   510  			value = Nullable{
   511  				IsNull: true,
   512  				Value:  zero,
   513  			}
   514  			return
   515  		}
   516  		value = Nullable{
   517  			IsNull: vec.GetNulls().Contains(uint64(i)),
   518  			Value:  vec.Col.([]types.Datetime)[i],
   519  		}
   520  		return
   521  
   522  	case types.T_timestamp:
   523  		if vec.IsScalarNull() {
   524  			var zero types.Timestamp
   525  			value = Nullable{
   526  				IsNull: true,
   527  				Value:  zero,
   528  			}
   529  			return
   530  		}
   531  		value = Nullable{
   532  			IsNull: vec.GetNulls().Contains(uint64(i)),
   533  			Value:  vec.Col.([]types.Timestamp)[i],
   534  		}
   535  		return
   536  
   537  	case types.T_decimal64:
   538  		if vec.IsScalarNull() {
   539  			var zero types.Decimal64
   540  			value = Nullable{
   541  				IsNull: true,
   542  				Value:  zero,
   543  			}
   544  			return
   545  		}
   546  		value = Nullable{
   547  			IsNull: vec.GetNulls().Contains(uint64(i)),
   548  			Value:  vec.Col.([]types.Decimal64)[i],
   549  		}
   550  		return
   551  
   552  	case types.T_decimal128:
   553  		if vec.IsScalarNull() {
   554  			var zero types.Decimal128
   555  			value = Nullable{
   556  				IsNull: true,
   557  				Value:  zero,
   558  			}
   559  			return
   560  		}
   561  		value = Nullable{
   562  			IsNull: vec.GetNulls().Contains(uint64(i)),
   563  			Value:  vec.Col.([]types.Decimal128)[i],
   564  		}
   565  		return
   566  
   567  	case types.T_Rowid:
   568  		if vec.IsScalarNull() {
   569  			var zero types.Rowid
   570  			value = Nullable{
   571  				IsNull: true,
   572  				Value:  zero,
   573  			}
   574  			return
   575  		}
   576  		value = Nullable{
   577  			IsNull: vec.GetNulls().Contains(uint64(i)),
   578  			Value:  vec.Col.([]types.Rowid)[i],
   579  		}
   580  		return
   581  
   582  	case types.T_uuid:
   583  		if vec.IsScalarNull() {
   584  			var zero types.Uuid
   585  			value = Nullable{
   586  				IsNull: true,
   587  				Value:  zero,
   588  			}
   589  			return
   590  		}
   591  		value = Nullable{
   592  			IsNull: vec.GetNulls().Contains(uint64(i)),
   593  			Value:  vec.Col.([]types.Uuid)[i],
   594  		}
   595  		return
   596  
   597  	}
   598  
   599  	panic(fmt.Sprintf("unknown column type: %v", vec.Typ))
   600  }
   601  
   602  func appendNullableValueToVector(vec *vector.Vector, value Nullable, mp *mpool.MPool) {
   603  	str, ok := value.Value.(string)
   604  	if ok {
   605  		value.Value = []byte(str)
   606  	}
   607  	vec.Append(value.Value, false, mp)
   608  	if value.IsNull {
   609  		vec.GetNulls().Set(uint64(vec.Length() - 1))
   610  	}
   611  }