vitess.io/vitess@v0.16.2/go/vt/vtorc/inst/tablet_dao.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package inst
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  
    23  	"vitess.io/vitess/go/vt/external/golib/sqlutils"
    24  	"vitess.io/vitess/go/vt/log"
    25  
    26  	"google.golang.org/protobuf/encoding/prototext"
    27  
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"vitess.io/vitess/go/vt/logutil"
    31  	replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
    32  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    33  	"vitess.io/vitess/go/vt/topo"
    34  	"vitess.io/vitess/go/vt/topo/topoproto"
    35  	"vitess.io/vitess/go/vt/vtorc/db"
    36  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    37  )
    38  
    39  // TopoServ is the connection to the topo server.
    40  var TopoServ *topo.Server
    41  
    42  // ErrTabletAliasNil is a fixed error message.
    43  var ErrTabletAliasNil = errors.New("tablet alias is nil")
    44  
    45  // SwitchPrimary makes the new tablet the primary and proactively performs
    46  // the necessary propagation to the old primary. The propagation is best
    47  // effort. If it fails, the tablet's shard sync will eventually converge.
    48  // The proactive propagation allows a competing VTOrc from discovering
    49  // the successful action of a previous one, which reduces churn.
    50  func SwitchPrimary(newPrimaryKey, oldPrimaryKey InstanceKey) error {
    51  	durability, err := GetDurabilityPolicy(newPrimaryKey)
    52  	if err != nil {
    53  		return err
    54  	}
    55  	newPrimaryTablet, err := ChangeTabletType(newPrimaryKey, topodatapb.TabletType_PRIMARY, SemiSyncAckers(durability, newPrimaryKey) > 0)
    56  	if err != nil {
    57  		return err
    58  	}
    59  	// The following operations are best effort.
    60  	if newPrimaryTablet.Type != topodatapb.TabletType_PRIMARY {
    61  		log.Errorf("Unexpected: tablet type did not change to primary: %v", newPrimaryTablet.Type)
    62  		return nil
    63  	}
    64  	ctx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
    65  	defer cancel()
    66  	_, err = TopoServ.UpdateShardFields(ctx, newPrimaryTablet.Keyspace, newPrimaryTablet.Shard, func(si *topo.ShardInfo) error {
    67  		if proto.Equal(si.PrimaryAlias, newPrimaryTablet.Alias) && proto.Equal(si.PrimaryTermStartTime, newPrimaryTablet.PrimaryTermStartTime) {
    68  			return topo.NewError(topo.NoUpdateNeeded, "")
    69  		}
    70  
    71  		// We just successfully reparented. We should check timestamps, but always overwrite.
    72  		lastTerm := si.GetPrimaryTermStartTime()
    73  		newTerm := logutil.ProtoToTime(newPrimaryTablet.PrimaryTermStartTime)
    74  		if !newTerm.After(lastTerm) {
    75  			log.Errorf("Possible clock skew. New primary start time is before previous one: %v vs %v", newTerm, lastTerm)
    76  		}
    77  
    78  		aliasStr := topoproto.TabletAliasString(newPrimaryTablet.Alias)
    79  		log.Infof("Updating shard record: primary_alias=%v, primary_term_start_time=%v", aliasStr, newTerm)
    80  		si.PrimaryAlias = newPrimaryTablet.Alias
    81  		si.PrimaryTermStartTime = newPrimaryTablet.PrimaryTermStartTime
    82  		return nil
    83  	})
    84  	// Don't proceed if shard record could not be updated.
    85  	if err != nil {
    86  		log.Error(err)
    87  		return nil
    88  	}
    89  	if _, err := ChangeTabletType(oldPrimaryKey, topodatapb.TabletType_REPLICA, IsReplicaSemiSync(durability, newPrimaryKey, oldPrimaryKey)); err != nil {
    90  		// This is best effort.
    91  		log.Error(err)
    92  	}
    93  	return nil
    94  }
    95  
    96  // ChangeTabletType designates the tablet that owns an instance as the primary.
    97  func ChangeTabletType(instanceKey InstanceKey, tabletType topodatapb.TabletType, semiSync bool) (*topodatapb.Tablet, error) {
    98  	if instanceKey.Hostname == "" {
    99  		return nil, errors.New("can't set tablet to primary: instance is unspecified")
   100  	}
   101  	tablet, err := ReadTablet(instanceKey)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	tmc := tmclient.NewTabletManagerClient()
   106  	tmcCtx, tmcCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
   107  	defer tmcCancel()
   108  	if err := tmc.ChangeType(tmcCtx, tablet, tabletType, semiSync); err != nil {
   109  		return nil, err
   110  	}
   111  	tsCtx, tsCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
   112  	defer tsCancel()
   113  	ti, err := TopoServ.GetTablet(tsCtx, tablet.Alias)
   114  	if err != nil {
   115  		log.Error(err)
   116  		return nil, err
   117  	}
   118  	if err := SaveTablet(ti.Tablet); err != nil {
   119  		log.Error(err)
   120  	}
   121  	return ti.Tablet, nil
   122  }
   123  
   124  // ResetReplicationParameters resets the replication parameters on the given tablet.
   125  func ResetReplicationParameters(instanceKey InstanceKey) error {
   126  	tablet, err := ReadTablet(instanceKey)
   127  	if err != nil {
   128  		return err
   129  	}
   130  	tmc := tmclient.NewTabletManagerClient()
   131  	tmcCtx, tmcCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
   132  	defer tmcCancel()
   133  	if err := tmc.ResetReplicationParameters(tmcCtx, tablet); err != nil {
   134  		return err
   135  	}
   136  	return nil
   137  }
   138  
   139  // FullStatus gets the full status of the MySQL running in vttablet.
   140  func FullStatus(instanceKey InstanceKey) (*replicationdatapb.FullStatus, error) {
   141  	tablet, err := ReadTablet(instanceKey)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	tmc := tmclient.NewTabletManagerClient()
   146  	tmcCtx, tmcCancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout)
   147  	defer tmcCancel()
   148  	return tmc.FullStatus(tmcCtx, tablet)
   149  }
   150  
   151  // ReadTablet reads the vitess tablet record.
   152  func ReadTablet(instanceKey InstanceKey) (*topodatapb.Tablet, error) {
   153  	query := `
   154  		select
   155  			info
   156  		from
   157  			vitess_tablet
   158  		where hostname=? and port=?
   159  		`
   160  	args := sqlutils.Args(instanceKey.Hostname, instanceKey.Port)
   161  	tablet := &topodatapb.Tablet{}
   162  	err := db.QueryVTOrc(query, args, func(row sqlutils.RowMap) error {
   163  		return prototext.Unmarshal([]byte(row.GetString("info")), tablet)
   164  	})
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	if tablet.Alias == nil {
   169  		return nil, ErrTabletAliasNil
   170  	}
   171  	return tablet, nil
   172  }
   173  
   174  // SaveTablet saves the tablet record against the instanceKey.
   175  func SaveTablet(tablet *topodatapb.Tablet) error {
   176  	tabletp, err := prototext.Marshal(tablet)
   177  	if err != nil {
   178  		return err
   179  	}
   180  	_, err = db.ExecVTOrc(`
   181  		replace
   182  			into vitess_tablet (
   183  				alias, hostname, port, cell, keyspace, shard, tablet_type, primary_timestamp, info
   184  			) values (
   185  				?, ?, ?, ?, ?, ?, ?, ?, ?
   186  			)
   187  		`,
   188  		topoproto.TabletAliasString(tablet.Alias),
   189  		tablet.MysqlHostname,
   190  		int(tablet.MysqlPort),
   191  		tablet.Alias.Cell,
   192  		tablet.Keyspace,
   193  		tablet.Shard,
   194  		int(tablet.Type),
   195  		logutil.ProtoToTime(tablet.PrimaryTermStartTime),
   196  		tabletp,
   197  	)
   198  	return err
   199  }