vitess.io/vitess@v0.16.2/go/vt/topo/server.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package topo is the module responsible for interacting with the topology
    19  service. It uses one Conn connection to the global topo service (with
    20  possibly another one to a read-only version of the global topo service),
    21  and one to each cell topo service.
    22  
    23  It contains the plug-in interfaces Conn, Factory and Version that topo
    24  implementations will use. We support Zookeeper, etcd, consul as real
    25  topo servers, and in-memory, tee as test and utility topo servers.
    26  Implementations are in sub-directories here.
    27  
    28  In tests, we do not mock this package. Instead, we just use a memorytopo.
    29  
    30  We also support copying data across topo servers (using helpers/copy.go
    31  and the topo2topo cmd binary), and writing to two topo servers at the same
    32  time (using helpers/tee.go). This is to facilitate migrations between
    33  topo servers.
    34  
    35  There are two test sub-packages associated with this code:
    36    - test/ contains a test suite that is run against all of our implementations.
    37      It just performs a bunch of common topo server activities (create, list,
    38      delete various objects, ...). If a topo implementation passes all these
    39      tests, it most likely will work as expected in a real deployment.
    40    - topotests/ contains tests that use a memorytopo to test the code in this
    41      package.
    42  */
    43  package topo
    44  
    45  import (
    46  	"context"
    47  	"fmt"
    48  	"sync"
    49  
    50  	"github.com/spf13/pflag"
    51  
    52  	"vitess.io/vitess/go/vt/log"
    53  	"vitess.io/vitess/go/vt/proto/topodata"
    54  	"vitess.io/vitess/go/vt/servenv"
    55  	"vitess.io/vitess/go/vt/vterrors"
    56  )
    57  
    58  const (
    59  	// GlobalCell is the name of the global cell.  It is special
    60  	// as it contains the global topology, and references the other cells.
    61  	GlobalCell = "global"
    62  
    63  	// GlobalReadOnlyCell is the name of the global read-only cell
    64  	// connection cell name.
    65  	GlobalReadOnlyCell = "global-read-only"
    66  )
    67  
    68  // Filenames for all object types.
    69  const (
    70  	CellInfoFile          = "CellInfo"
    71  	CellsAliasFile        = "CellsAlias"
    72  	KeyspaceFile          = "Keyspace"
    73  	ShardFile             = "Shard"
    74  	VSchemaFile           = "VSchema"
    75  	ShardReplicationFile  = "ShardReplication"
    76  	TabletFile            = "Tablet"
    77  	SrvVSchemaFile        = "SrvVSchema"
    78  	SrvKeyspaceFile       = "SrvKeyspace"
    79  	RoutingRulesFile      = "RoutingRules"
    80  	ExternalClustersFile  = "ExternalClusters"
    81  	ShardRoutingRulesFile = "ShardRoutingRules"
    82  )
    83  
    84  // Path for all object types.
    85  const (
    86  	CellsPath             = "cells"
    87  	CellsAliasesPath      = "cells_aliases"
    88  	KeyspacesPath         = "keyspaces"
    89  	ShardsPath            = "shards"
    90  	TabletsPath           = "tablets"
    91  	MetadataPath          = "metadata"
    92  	ExternalClusterVitess = "vitess"
    93  )
    94  
    95  // Factory is a factory interface to create Conn objects.
    96  // Topo implementations will provide an implementation for this.
    97  type Factory interface {
    98  	// HasGlobalReadOnlyCell returns true if the global cell
    99  	// has read-only replicas of the topology data. The global topology
   100  	// is usually more expensive to read from / write to, as it is
   101  	// replicated over many cells. Some topology services provide
   102  	// more efficient way to read the data, like Observer servers
   103  	// for Zookeeper. If this returns true, we will maintain
   104  	// two connections for the global topology: the 'global' cell
   105  	// for consistent reads and writes, and the 'global-read-only'
   106  	// cell for reads only.
   107  	HasGlobalReadOnlyCell(serverAddr, root string) bool
   108  
   109  	// Create creates a topo.Conn object.
   110  	Create(cell, serverAddr, root string) (Conn, error)
   111  }
   112  
   113  // Server is the main topo.Server object. We support two ways of creating one:
   114  //  1. From an implementation, server address, and root path.
   115  //     This uses a plugin mechanism, and we have implementations for
   116  //     etcd, zookeeper and consul.
   117  //  2. Specific implementations may have higher level creation methods
   118  //     (in which case they may provide a more complex Factory).
   119  //     We support memorytopo (for tests and processes that only need an
   120  //     in-memory server), and tee (a helper implementation to transition
   121  //     between one server implementation and another).
   122  type Server struct {
   123  	// globalCell is the main connection to the global topo service.
   124  	// It is created once at construction time.
   125  	globalCell Conn
   126  
   127  	// globalReadOnlyCell is the read-only connection to the global
   128  	// topo service. It will be equal to globalCell if we don't distinguish
   129  	// the two.
   130  	globalReadOnlyCell Conn
   131  
   132  	// factory allows the creation of connections to various backends.
   133  	// It is set at construction time.
   134  	factory Factory
   135  
   136  	// mu protects the following fields.
   137  	mu sync.Mutex
   138  	// cellConns contains clients configured to talk to a list of
   139  	// topo instances representing local topo clusters. These
   140  	// should be accessed with the ConnForCell() method, which
   141  	// will read the list of addresses for that cell from the
   142  	// global cluster and create clients as needed.
   143  	cellConns map[string]cellConn
   144  }
   145  
   146  type cellConn struct {
   147  	cellInfo *topodata.CellInfo
   148  	conn     Conn
   149  }
   150  
   151  type cellsToAliasesMap struct {
   152  	mu sync.Mutex
   153  	// cellsToAliases contains all cell->alias mappings
   154  	cellsToAliases map[string]string
   155  }
   156  
   157  var (
   158  	// topoImplementation is the flag for which implementation to use.
   159  	topoImplementation string
   160  
   161  	// topoGlobalServerAddress is the address of the global topology
   162  	// server.
   163  	topoGlobalServerAddress string
   164  
   165  	// topoGlobalRoot is the root path to use for the global topology
   166  	// server.
   167  	topoGlobalRoot string
   168  
   169  	// factories has the factories for the Conn objects.
   170  	factories = make(map[string]Factory)
   171  
   172  	cellsAliases = cellsToAliasesMap{
   173  		cellsToAliases: make(map[string]string),
   174  	}
   175  
   176  	FlagBinaries = []string{"vttablet", "vtctl", "vtctld", "vtcombo", "vtgate",
   177  		"vtgr", "vtorc", "vtbackup"}
   178  )
   179  
   180  func init() {
   181  	for _, cmd := range FlagBinaries {
   182  		servenv.OnParseFor(cmd, registerTopoFlags)
   183  	}
   184  }
   185  
   186  func registerTopoFlags(fs *pflag.FlagSet) {
   187  	fs.StringVar(&topoImplementation, "topo_implementation", topoImplementation, "the topology implementation to use")
   188  	fs.StringVar(&topoGlobalServerAddress, "topo_global_server_address", topoGlobalServerAddress, "the address of the global topology server")
   189  	fs.StringVar(&topoGlobalRoot, "topo_global_root", topoGlobalRoot, "the path of the global topology data in the global topology server")
   190  }
   191  
   192  // RegisterFactory registers a Factory for an implementation for a Server.
   193  // If an implementation with that name already exists, it log.Fatals out.
   194  // Call this in the 'init' function in your topology implementation module.
   195  func RegisterFactory(name string, factory Factory) {
   196  	if factories[name] != nil {
   197  		log.Fatalf("Duplicate topo.Factory registration for %v", name)
   198  	}
   199  	factories[name] = factory
   200  }
   201  
   202  // NewWithFactory creates a new Server based on the given Factory.
   203  // It also opens the global cell connection.
   204  func NewWithFactory(factory Factory, serverAddress, root string) (*Server, error) {
   205  	conn, err := factory.Create(GlobalCell, serverAddress, root)
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  	conn = NewStatsConn(GlobalCell, conn)
   210  
   211  	var connReadOnly Conn
   212  	if factory.HasGlobalReadOnlyCell(serverAddress, root) {
   213  		connReadOnly, err = factory.Create(GlobalReadOnlyCell, serverAddress, root)
   214  		if err != nil {
   215  			return nil, err
   216  		}
   217  		connReadOnly = NewStatsConn(GlobalReadOnlyCell, connReadOnly)
   218  	} else {
   219  		connReadOnly = conn
   220  	}
   221  
   222  	return &Server{
   223  		globalCell:         conn,
   224  		globalReadOnlyCell: connReadOnly,
   225  		factory:            factory,
   226  		cellConns:          make(map[string]cellConn),
   227  	}, nil
   228  }
   229  
   230  // OpenServer returns a Server using the provided implementation,
   231  // address and root for the global server.
   232  func OpenServer(implementation, serverAddress, root string) (*Server, error) {
   233  	factory, ok := factories[implementation]
   234  	if !ok {
   235  		return nil, NewError(NoImplementation, implementation)
   236  	}
   237  	return NewWithFactory(factory, serverAddress, root)
   238  }
   239  
   240  // Open returns a Server using the command line parameter flags
   241  // for implementation, address and root. It log.Exits out if an error occurs.
   242  func Open() *Server {
   243  	if topoGlobalServerAddress == "" && topoImplementation != "k8s" {
   244  		log.Exitf("topo_global_server_address must be configured")
   245  	}
   246  	if topoGlobalRoot == "" {
   247  		log.Exit("topo_global_root must be non-empty")
   248  	}
   249  	ts, err := OpenServer(topoImplementation, topoGlobalServerAddress, topoGlobalRoot)
   250  	if err != nil {
   251  		log.Exitf("Failed to open topo server (%v,%v,%v): %v", topoImplementation, topoGlobalServerAddress, topoGlobalRoot, err)
   252  	}
   253  	return ts
   254  }
   255  
   256  // ConnForCell returns a Conn object for the given cell.
   257  // It caches Conn objects from previously requested cells.
   258  func (ts *Server) ConnForCell(ctx context.Context, cell string) (Conn, error) {
   259  	// Global cell is the easy case.
   260  	if cell == GlobalCell {
   261  		if ctx.Err() != nil {
   262  			return nil, ctx.Err()
   263  		}
   264  		return ts.globalCell, nil
   265  	}
   266  
   267  	// Fetch cell cluster addresses from the global cluster.
   268  	// We can use the GlobalReadOnlyCell for this call.
   269  	ci, err := ts.GetCellInfo(ctx, cell, false /*strongRead*/)
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  
   274  	// Return a cached client if present.
   275  	ts.mu.Lock()
   276  	defer ts.mu.Unlock()
   277  	cc, ok := ts.cellConns[cell]
   278  	if ok {
   279  		// Client exists in cache.
   280  		// Let's verify that it is the same cell as we are looking for.
   281  		// The cell name can be re-used with a different ServerAddress and/or Root
   282  		// in which case we should get a new connection and update the cache
   283  		if ci.ServerAddress == cc.cellInfo.ServerAddress && ci.Root == cc.cellInfo.Root {
   284  			return cc.conn, nil
   285  		}
   286  		// Close the cached connection, we don't need it anymore
   287  		if cc.conn != nil {
   288  			cc.conn.Close()
   289  		}
   290  	}
   291  
   292  	// Connect to the cell topo server, while holding the lock.
   293  	// This ensures only one connection is established at any given time.
   294  	// Create the connection and cache it
   295  	conn, err := ts.factory.Create(cell, ci.ServerAddress, ci.Root)
   296  	switch {
   297  	case err == nil:
   298  		conn = NewStatsConn(cell, conn)
   299  		ts.cellConns[cell] = cellConn{ci, conn}
   300  		return conn, nil
   301  	case IsErrType(err, NoNode):
   302  		err = vterrors.Wrap(err, fmt.Sprintf("failed to create topo connection to %v, %v", ci.ServerAddress, ci.Root))
   303  		return nil, NewError(NoNode, err.Error())
   304  	default:
   305  		return nil, vterrors.Wrap(err, fmt.Sprintf("failed to create topo connection to %v, %v", ci.ServerAddress, ci.Root))
   306  	}
   307  }
   308  
   309  // GetAliasByCell returns the alias group this `cell` belongs to, if there's none, it returns the `cell` as alias.
   310  func GetAliasByCell(ctx context.Context, ts *Server, cell string) string {
   311  	cellsAliases.mu.Lock()
   312  	defer cellsAliases.mu.Unlock()
   313  	if region, ok := cellsAliases.cellsToAliases[cell]; ok {
   314  		return region
   315  	}
   316  	if ts != nil {
   317  		// lazily get the region from cell info if `aliases` are available
   318  		cellAliases, err := ts.GetCellsAliases(ctx, false)
   319  		if err != nil {
   320  			// for backward compatibility
   321  			return cell
   322  		}
   323  
   324  		for alias, cellsAlias := range cellAliases {
   325  			for _, cellAlias := range cellsAlias.Cells {
   326  				if cellAlias == cell {
   327  					cellsAliases.cellsToAliases[cell] = alias
   328  					return alias
   329  				}
   330  			}
   331  		}
   332  	}
   333  	// for backward compatibility
   334  	return cell
   335  }
   336  
   337  // Close will close all connections to underlying topo Server.
   338  // It will nil all member variables, so any further access will panic.
   339  func (ts *Server) Close() {
   340  	ts.globalCell.Close()
   341  	if ts.globalReadOnlyCell != ts.globalCell {
   342  		ts.globalReadOnlyCell.Close()
   343  	}
   344  	ts.globalCell = nil
   345  	ts.globalReadOnlyCell = nil
   346  	ts.mu.Lock()
   347  	defer ts.mu.Unlock()
   348  	for _, cc := range ts.cellConns {
   349  		cc.conn.Close()
   350  	}
   351  	ts.cellConns = make(map[string]cellConn)
   352  }
   353  
   354  func (ts *Server) clearCellAliasesCache() {
   355  	cellsAliases.mu.Lock()
   356  	defer cellsAliases.mu.Unlock()
   357  	cellsAliases.cellsToAliases = make(map[string]string)
   358  }
   359  
   360  // OpenExternalVitessClusterServer returns the topo server of the external cluster
   361  func (ts *Server) OpenExternalVitessClusterServer(ctx context.Context, clusterName string) (*Server, error) {
   362  	vc, err := ts.GetExternalVitessCluster(ctx, clusterName)
   363  	if err != nil {
   364  		return nil, err
   365  	}
   366  	if vc == nil {
   367  		return nil, fmt.Errorf("no vitess cluster found with name %s", clusterName)
   368  	}
   369  	var externalTopo *Server
   370  	externalTopo, err = OpenServer(vc.TopoConfig.TopoType, vc.TopoConfig.Server, vc.TopoConfig.Root)
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  	if externalTopo == nil {
   375  		return nil, fmt.Errorf("unable to open external topo for config %s", clusterName)
   376  	}
   377  	return externalTopo, nil
   378  }
   379  
   380  // SetReadOnly is initially ONLY implemented by StatsConn and used in ReadOnlyServer
   381  func (ts *Server) SetReadOnly(readOnly bool) error {
   382  	globalCellConn, ok := ts.globalCell.(*StatsConn)
   383  	if !ok {
   384  		return fmt.Errorf("invalid global cell connection type, expected StatsConn but found: %T", ts.globalCell)
   385  	}
   386  	globalCellConn.SetReadOnly(readOnly)
   387  
   388  	for _, cc := range ts.cellConns {
   389  		localCellConn, ok := cc.conn.(*StatsConn)
   390  		if !ok {
   391  			return fmt.Errorf("invalid local cell connection type, expected StatsConn but found: %T", cc.conn)
   392  		}
   393  		localCellConn.SetReadOnly(true)
   394  	}
   395  
   396  	return nil
   397  }
   398  
   399  // IsReadOnly is initially ONLY implemented by StatsConn and used in ReadOnlyServer
   400  func (ts *Server) IsReadOnly() (bool, error) {
   401  	globalCellConn, ok := ts.globalCell.(*StatsConn)
   402  	if !ok {
   403  		return false, fmt.Errorf("invalid global cell connection type, expected StatsConn but found: %T", ts.globalCell)
   404  	}
   405  	if !globalCellConn.IsReadOnly() {
   406  		return false, nil
   407  	}
   408  
   409  	for _, cc := range ts.cellConns {
   410  		localCellConn, ok := cc.conn.(*StatsConn)
   411  		if !ok {
   412  			return false, fmt.Errorf("invalid local cell connection type, expected StatsConn but found: %T", cc.conn)
   413  		}
   414  		if !localCellConn.IsReadOnly() {
   415  			return false, nil
   416  		}
   417  	}
   418  
   419  	return true, nil
   420  }