github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/dnservice/parse.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dnservice
    16  
    17  import (
    18  	"fmt"
    19  	"sort"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    22  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    23  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    24  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util"
    25  	"github.com/matrixorigin/matrixone/pkg/hakeeper/operator"
    26  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    27  )
    28  
    29  const (
    30  	TnStoreCapacity = 32
    31  )
    32  
    33  // ShardMapper used to get log shard ID for tn shard
    34  type ShardMapper interface {
    35  	getLogShardID(tnShardID uint64) (uint64, error)
    36  }
    37  
    38  // tnShardToLogShard implements interface `ShardMapper`
    39  type tnShardToLogShard map[uint64]uint64
    40  
    41  // parseClusterInfo parses information from `pb.ClusterInfo`
    42  func parseClusterInfo(cluster pb.ClusterInfo) tnShardToLogShard {
    43  	m := make(map[uint64]uint64)
    44  	for _, r := range cluster.TNShards {
    45  		// warning with duplicated tn shard ID
    46  		m[r.ShardID] = r.LogShardID
    47  	}
    48  	return m
    49  }
    50  
    51  // getLogShardID implements interface `ShardMapper`
    52  func (d tnShardToLogShard) getLogShardID(tnShardID uint64) (uint64, error) {
    53  	if logShardID, ok := d[tnShardID]; ok {
    54  		return logShardID, nil
    55  	}
    56  	return 0, moerr.NewInvalidStateNoCtx("shard %d not recorded", tnShardID)
    57  }
    58  
    59  // parseTnState parses cluster tn state.
    60  func parseTnState(cfg hakeeper.Config,
    61  	tnState pb.TNState, currTick uint64,
    62  ) (*util.ClusterStores, *reportedShards) {
    63  	stores := util.NewClusterStores()
    64  	shards := newReportedShards()
    65  
    66  	for storeID, storeInfo := range tnState.Stores {
    67  		expired := false
    68  		if cfg.TNStoreExpired(storeInfo.Tick, currTick) {
    69  			expired = true
    70  		}
    71  
    72  		store := util.NewStore(storeID, len(storeInfo.Shards), TnStoreCapacity)
    73  		if expired {
    74  			stores.RegisterExpired(store)
    75  		} else {
    76  			stores.RegisterWorking(store)
    77  		}
    78  
    79  		for _, shard := range storeInfo.Shards {
    80  			replica := newReplica(shard.ReplicaID, shard.ShardID, storeID)
    81  			shards.registerReplica(replica, expired)
    82  		}
    83  	}
    84  
    85  	return stores, shards
    86  }
    87  
    88  // checkReportedState generates Operators for reported state.
    89  // NB: the order of list is deterministic.
    90  func checkReportedState(rs *reportedShards, mapper ShardMapper, workingStores []*util.Store, idAlloc util.IDAllocator) []*operator.Operator {
    91  	var ops []*operator.Operator
    92  
    93  	reported := rs.listShards()
    94  	// keep order of all shards deterministic
    95  	sort.Slice(reported, func(i, j int) bool {
    96  		return reported[i] < reported[j]
    97  	})
    98  
    99  	for _, shardID := range reported {
   100  		shard, err := rs.getShard(shardID)
   101  		if err != nil {
   102  			// error should be always nil
   103  			panic(fmt.Sprintf("shard `%d` not register", shardID))
   104  		}
   105  
   106  		steps := checkShard(shard, mapper, workingStores, idAlloc)
   107  		// avoid Operator with nil steps
   108  		if len(steps) > 0 {
   109  			ops = append(ops,
   110  				operator.NewOperator("dnservice", shardID, operator.NoopEpoch, steps...),
   111  			)
   112  		}
   113  	}
   114  
   115  	runtime.ProcessLevelRuntime().Logger().Debug(fmt.Sprintf("construct %d operators for reported tn shards", len(ops)))
   116  
   117  	return ops
   118  }
   119  
   120  // checkInitiatingShards generates Operators for newly-created shards.
   121  // NB: the order of list is deterministic.
   122  func checkInitiatingShards(
   123  	rs *reportedShards, mapper ShardMapper, workingStores []*util.Store, idAlloc util.IDAllocator,
   124  	cluster pb.ClusterInfo, cfg hakeeper.Config, currTick uint64) []*operator.Operator {
   125  	// update the registered newly-created shards
   126  	for _, record := range cluster.TNShards {
   127  		shardID := record.ShardID
   128  		_, err := rs.getShard(shardID)
   129  		if err != nil {
   130  			if moerr.IsMoErrCode(err, moerr.ErrShardNotReported) {
   131  				// if a shard not reported, register it,
   132  				// and launch its replica after a while.
   133  				waitingShards.register(shardID, currTick)
   134  			}
   135  			continue
   136  		}
   137  		// shard reported via heartbeat, no need to wait
   138  		waitingShards.remove(shardID)
   139  	}
   140  
   141  	// list newly-created shards which had been waiting for a while
   142  	expired := waitingShards.listEligibleShards(func(start uint64) bool {
   143  		return cfg.TNStoreExpired(start, currTick)
   144  	})
   145  
   146  	var ops []*operator.Operator
   147  	for _, id := range expired {
   148  		steps := checkShard(newTnShard(id), mapper, workingStores, idAlloc)
   149  		if len(steps) > 0 { // avoid Operator with nil steps
   150  			ops = append(ops,
   151  				operator.NewOperator("dnservice", id, operator.NoopEpoch, steps...),
   152  			)
   153  		}
   154  	}
   155  
   156  	runtime.ProcessLevelRuntime().Logger().Debug(fmt.Sprintf("construct %d operators for initiating tn shards", len(ops)))
   157  	if bootstrapping && len(ops) != 0 {
   158  		bootstrapping = false
   159  	}
   160  
   161  	return ops
   162  }
   163  
   164  type earliestTick struct {
   165  	tick uint64
   166  }
   167  
   168  // initialShards records all fresh tn shards.
   169  type initialShards struct {
   170  	shards map[uint64]earliestTick
   171  }
   172  
   173  func newInitialShards() *initialShards {
   174  	return &initialShards{
   175  		shards: make(map[uint64]earliestTick),
   176  	}
   177  }
   178  
   179  // register records initial shard with its oldest tick.
   180  func (w *initialShards) register(shardID, currTick uint64) bool {
   181  	if earliest, ok := w.shards[shardID]; ok {
   182  		if currTick >= earliest.tick {
   183  			return false
   184  		}
   185  	}
   186  	// newly registered or updated with older tick
   187  	w.shards[shardID] = earliestTick{tick: currTick}
   188  	return true
   189  }
   190  
   191  // remove deletes shard from the recorded fresh shards.
   192  func (w *initialShards) remove(shardID uint64) bool {
   193  	if _, ok := w.shards[shardID]; ok {
   194  		delete(w.shards, shardID)
   195  		return true
   196  	}
   197  	return false
   198  }
   199  
   200  // listEligibleShards lists all shards that `fn` returns true.
   201  // NB: the order of list isn't deterministic.
   202  func (w *initialShards) listEligibleShards(fn func(tick uint64) bool) []uint64 {
   203  	ids := make([]uint64, 0)
   204  	for id, earliest := range w.shards {
   205  		if bootstrapping || fn(earliest.tick) {
   206  			ids = append(ids, id)
   207  		}
   208  	}
   209  	return ids
   210  }
   211  
   212  // clear clears all record.
   213  func (w *initialShards) clear() {
   214  	w.shards = make(map[uint64]earliestTick)
   215  }
   216  
   217  // reportedShards collects all reported tn shards.
   218  type reportedShards struct {
   219  	shards   map[uint64]*tnShard
   220  	shardIDs []uint64
   221  }
   222  
   223  func newReportedShards() *reportedShards {
   224  	return &reportedShards{
   225  		shards: make(map[uint64]*tnShard),
   226  	}
   227  }
   228  
   229  // registerReplica collects tn shard replicas by their status.
   230  func (rs *reportedShards) registerReplica(replica *tnReplica, expired bool) {
   231  	shardID := replica.shardID
   232  	if _, ok := rs.shards[shardID]; !ok {
   233  		rs.shardIDs = append(rs.shardIDs, shardID)
   234  		rs.shards[shardID] = newTnShard(shardID)
   235  	}
   236  	rs.shards[shardID].register(replica, expired)
   237  }
   238  
   239  // listShards lists all the shard IDs.
   240  // NB: the returned order isn't deterministic.
   241  func (rs *reportedShards) listShards() []uint64 {
   242  	return rs.shardIDs
   243  }
   244  
   245  // getShard returns tn shard by shard ID.
   246  func (rs *reportedShards) getShard(shardID uint64) (*tnShard, error) {
   247  	if shard, ok := rs.shards[shardID]; ok {
   248  		return shard, nil
   249  	}
   250  	return nil, moerr.NewShardNotReportedNoCtx("", shardID)
   251  }
   252  
   253  // tnShard records metadata for tn shard.
   254  type tnShard struct {
   255  	shardID uint64
   256  	expired []*tnReplica
   257  	working []*tnReplica
   258  }
   259  
   260  func newTnShard(shardID uint64) *tnShard {
   261  	return &tnShard{
   262  		shardID: shardID,
   263  	}
   264  }
   265  
   266  // register collects tn shard replica.
   267  func (s *tnShard) register(replica *tnReplica, expired bool) {
   268  	if expired {
   269  		s.expired = append(s.expired, replica)
   270  	} else {
   271  		s.working = append(s.working, replica)
   272  	}
   273  }
   274  
   275  // workingReplicas returns all working replicas.
   276  // NB: the returned order isn't deterministic.
   277  func (s *tnShard) workingReplicas() []*tnReplica {
   278  	return s.working
   279  }
   280  
   281  // workingReplicas returns all expired replicas.
   282  // NB: the returned order isn't deterministic.
   283  func (s *tnShard) expiredReplicas() []*tnReplica {
   284  	return s.expired
   285  }
   286  
   287  // tnReplica records metadata for tn shard replica
   288  type tnReplica struct {
   289  	replicaID uint64
   290  	shardID   uint64
   291  	storeID   string
   292  }
   293  
   294  func newReplica(
   295  	replicaID, shardID uint64, storeID string,
   296  ) *tnReplica {
   297  	return &tnReplica{
   298  		replicaID: replicaID,
   299  		shardID:   shardID,
   300  		storeID:   storeID,
   301  	}
   302  }