github.com/weaviate/weaviate@v1.24.6/usecases/schema/startup_cluster_sync.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package schema
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"strings"
    18  
    19  	"github.com/sirupsen/logrus"
    20  	"github.com/weaviate/weaviate/entities/models"
    21  )
    22  
    23  // startupClusterSync tries to determine what - if any - schema migration is
    24  // required at startup. If a node is the first in a cluster the assumption is
    25  // that its state is the truth.
    26  //
    27  // For the n-th node (where n>1) there is a potential for conflict if the
    28  // schemas aren't in sync:
    29  //
    30  // - If Node 1 has a non-nil schema, but Node 2 has a nil-schema, then we can
    31  // consider Node 2 to be a new node that is just joining the cluster. In this
    32  // case, we can copy the state from the existing nodes (if they agree on a
    33  // schema)
    34  //
    35  // - If Node 1 and Node 2 have an identical schema, then we can assume that the
    36  // startup was just an ordinary (re)start of the node. No action is required.
    37  //
    38  // - If Node 1 and Node 2 both have a schema, but they aren't in sync, the
    39  // cluster is broken. This state cannot be automatically recovered from and
    40  // startup needs to fail. Manual intervention would be required in this case.
    41  func (m *Manager) startupClusterSync(ctx context.Context) error {
    42  	nodes := m.clusterState.AllNames()
    43  	if len(nodes) <= 1 {
    44  		return m.startupHandleSingleNode(ctx, nodes)
    45  	}
    46  
    47  	if m.schemaCache.isEmpty() {
    48  		return m.startupJoinCluster(ctx)
    49  	}
    50  
    51  	err := m.validateSchemaCorruption(ctx)
    52  	if err == nil {
    53  		// schema is fine, we are done
    54  		return nil
    55  	}
    56  
    57  	if m.clusterState.SchemaSyncIgnored() {
    58  		m.logger.WithError(err).WithFields(logrusStartupSyncFields()).
    59  			Warning("schema out of sync, but ignored because " +
    60  				"CLUSTER_IGNORE_SCHEMA_SYNC=true")
    61  		return nil
    62  	}
    63  
    64  	if m.cluster.HaveDanglingTxs(ctx, resumableTxs) {
    65  		m.logger.WithFields(logrusStartupSyncFields()).
    66  			Infof("schema out of sync, but there are dangling transactions, the check will be repeated after an attempt to resume those transactions")
    67  
    68  		m.LockGuard(func() {
    69  			m.shouldTryToResumeTx = true
    70  		})
    71  		return nil
    72  	}
    73  
    74  	return err
    75  }
    76  
    77  // startupHandleSingleNode deals with the case where there is only a single
    78  // node in the cluster. In the vast majority of cases there is nothing to do.
    79  // An edge case would be where the cluster has size=0, or size=1 but the node's
    80  // name is not the local name's node. This would indicate a broken cluster and
    81  // can't be recovered from
    82  func (m *Manager) startupHandleSingleNode(ctx context.Context,
    83  	nodes []string,
    84  ) error {
    85  	localName := m.clusterState.LocalName()
    86  	if len(nodes) == 0 {
    87  		return fmt.Errorf("corrupt cluster state: cluster has size=0")
    88  	}
    89  
    90  	if nodes[0] != localName {
    91  		return fmt.Errorf("corrupt cluster state: only node in the cluster does not "+
    92  			"match local node name: %v vs %s", nodes, localName)
    93  	}
    94  
    95  	m.logger.WithFields(logrusStartupSyncFields()).
    96  		Debug("Only node in the cluster at this point. " +
    97  			"No schema sync necessary.")
    98  
    99  	// startup is complete
   100  	return nil
   101  }
   102  
   103  // startupJoinCluster migrates the schema for a new node. The assumption is
   104  // that other nodes have schema state and we need to migrate this schema to the
   105  // local node transactionally. In other words, this startup process can not
   106  // occur concurrently with a user-initiated schema update. One of those must
   107  // fail.
   108  //
   109  // There is one edge case: The cluster could consist of multiple nodes which
   110  // are empty. In this case, no migration is required.
   111  func (m *Manager) startupJoinCluster(ctx context.Context) error {
   112  	tx, err := m.cluster.BeginTransaction(ctx, ReadSchema, nil, DefaultTxTTL)
   113  	if err != nil {
   114  		if m.clusterSyncImpossibleBecauseRemoteNodeTooOld(err) {
   115  			return nil
   116  		}
   117  		return fmt.Errorf("read schema: open transaction: %w", err)
   118  	}
   119  
   120  	// this tx is read-only, so we don't have to worry about aborting it, the
   121  	// close should be the same on both happy and unhappy path
   122  	defer m.cluster.CloseReadTransaction(ctx, tx)
   123  
   124  	pl, ok := tx.Payload.(ReadSchemaPayload)
   125  	if !ok {
   126  		return fmt.Errorf("unrecognized tx response payload: %T", tx.Payload)
   127  	}
   128  
   129  	// by the time we're here the consensus function has run, so we can be sure
   130  	// that all other nodes agree on this schema.
   131  
   132  	if isEmpty(pl.Schema) {
   133  		// already in sync, nothing to do
   134  		return nil
   135  	}
   136  
   137  	if err := m.saveSchema(ctx, *pl.Schema); err != nil {
   138  		return fmt.Errorf("save schema: %w", err)
   139  	}
   140  
   141  	m.schemaCache.setState(*pl.Schema)
   142  
   143  	return nil
   144  }
   145  
   146  func (m *Manager) ClusterStatus(ctx context.Context) (*models.SchemaClusterStatus, error) {
   147  	m.RLock()
   148  	defer m.RUnlock()
   149  
   150  	out := &models.SchemaClusterStatus{
   151  		Hostname:         m.clusterState.LocalName(),
   152  		IgnoreSchemaSync: m.clusterState.SchemaSyncIgnored(),
   153  	}
   154  
   155  	nodes := m.clusterState.AllNames()
   156  	out.NodeCount = int64(len(nodes))
   157  	if len(nodes) < 2 {
   158  		out.Healthy = true
   159  		return out, nil
   160  	}
   161  
   162  	err := m.validateSchemaCorruption(ctx)
   163  	if err != nil {
   164  		out.Error = err.Error()
   165  		out.Healthy = false
   166  		return out, err
   167  	}
   168  
   169  	out.Healthy = true
   170  	return out, nil
   171  }
   172  
   173  // validateSchemaCorruption makes sure that - given that all nodes in the
   174  // cluster have a schema - they are in sync. If not the cluster is considered
   175  // broken and needs to be repaired manually
   176  func (m *Manager) validateSchemaCorruption(ctx context.Context) error {
   177  	tx, err := m.cluster.BeginTransaction(ctx, ReadSchema, nil, DefaultTxTTL)
   178  	if err != nil {
   179  		if m.clusterSyncImpossibleBecauseRemoteNodeTooOld(err) {
   180  			return nil
   181  		}
   182  		return fmt.Errorf("read schema: open transaction: %w", err)
   183  	}
   184  
   185  	// this tx is read-only, so we don't have to worry about aborting it, the
   186  	// close should be the same on both happy and unhappy path
   187  	if err = m.cluster.CloseReadTransaction(ctx, tx); err != nil {
   188  		return err
   189  	}
   190  
   191  	pl, ok := tx.Payload.(ReadSchemaPayload)
   192  	if !ok {
   193  		return fmt.Errorf("unrecognized tx response payload: %T", tx.Payload)
   194  	}
   195  	var diff []string
   196  	cmp := func() error {
   197  		if err := Equal(&m.schemaCache.State, pl.Schema); err != nil {
   198  			diff = Diff("local", &m.schemaCache.State, "cluster", pl.Schema)
   199  			return err
   200  		}
   201  		return nil
   202  	}
   203  	if err := m.schemaCache.RLockGuard(cmp); err != nil {
   204  		m.logger.WithFields(logrusStartupSyncFields()).WithFields(logrus.Fields{
   205  			"diff": diff,
   206  		}).Warning("mismatch between local schema and remote (other nodes consensus) schema")
   207  		if m.clusterState.SkipSchemaRepair() {
   208  			return fmt.Errorf("corrupt cluster: other nodes have consensus on schema, "+
   209  				"but local node has a different (non-null) schema: %w", err)
   210  		}
   211  		if repairErr := m.repairSchema(ctx, pl.Schema); repairErr != nil {
   212  			return fmt.Errorf("attempted to repair and failed: %v, sync error: %w", repairErr, err)
   213  		}
   214  	}
   215  
   216  	return nil
   217  }
   218  
   219  func logrusStartupSyncFields() logrus.Fields {
   220  	return logrus.Fields{"action": "startup_cluster_schema_sync"}
   221  }
   222  
   223  func isEmpty(schema *State) bool {
   224  	return schema == nil || schema.ObjectSchema == nil || len(schema.ObjectSchema.Classes) == 0
   225  }
   226  
   227  func (m *Manager) clusterSyncImpossibleBecauseRemoteNodeTooOld(err error) bool {
   228  	// string-matching on the error message isn't the cleanest way possible, but
   229  	// unfortunately there's not an easy way to find out, as this check has to
   230  	// work with whatever was already present in v1.16.x
   231  	//
   232  	// in theory we could have used the node api which also returns the versions,
   233  	// however, the node API depends on the DB which depends on the schema
   234  	// manager, so we cannot use them at schema manager startup which happens
   235  	// before db startup.
   236  	//
   237  	// Given that this workaround should only ever be required during a rolling
   238  	// update from v1.16 to v1.17, we can consider this acceptable
   239  	if strings.Contains(err.Error(), "unrecognized schema transaction type") {
   240  		m.logger.WithFields(logrusStartupSyncFields()).
   241  			Info("skipping schema cluster sync because not all nodes in the cluster " +
   242  				"support schema cluster sync yet. To enable schema cluster sync at startup " +
   243  				"make sure all nodes in the cluster run at least v1.17")
   244  		return true
   245  	}
   246  
   247  	return false
   248  }