github.com/matrixorigin/matrixone@v1.2.0/pkg/bootstrap/service_upgrade_tenant.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bootstrap
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/bootstrap/versions"
    23  	"github.com/matrixorigin/matrixone/pkg/catalog"
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    26  	"github.com/matrixorigin/matrixone/pkg/txn/client"
    27  	"github.com/matrixorigin/matrixone/pkg/util/executor"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // MaybeUpgradeTenant used to check the tenant need upgrade or not. If need upgrade, it will
    32  // upgrade the tenant immediately in current txn.
    33  func (s *service) MaybeUpgradeTenant(
    34  	ctx context.Context,
    35  	tenantFetchFunc func() (int32, string, error),
    36  	txnOp client.TxnOperator) (bool, error) {
    37  	tenantID, version, err := tenantFetchFunc()
    38  	if err != nil {
    39  		return false, err
    40  	}
    41  
    42  	s.mu.RLock()
    43  	checked := s.mu.tenants[tenantID]
    44  	s.mu.RUnlock()
    45  	if checked {
    46  		return false, nil
    47  	}
    48  
    49  	upgraded := false
    50  	opts := executor.Options{}.WithTxn(txnOp)
    51  	err = s.exec.ExecTxn(
    52  		ctx,
    53  		func(txn executor.TxnExecutor) error {
    54  			txn.Use(catalog.MO_CATALOG)
    55  			// tenant create at current cn, can work correctly
    56  			currentCN := s.getFinalVersionHandle().Metadata()
    57  			if currentCN.Version == version {
    58  				return nil
    59  			} else if versions.Compare(currentCN.Version, version) < 0 {
    60  				// tenant create at 1.4.0, current tenant version 1.5.0, it must be cannot work
    61  				return moerr.NewInvalidInputNoCtx("tenant version %s is greater than current cn version %s",
    62  					version, currentCN.Version)
    63  			}
    64  
    65  			// arrive here means tenant version < current cn version, need upgrade.
    66  			// and currentCN.Version == last cluster version
    67  
    68  			latestVersion, err := versions.GetLatestVersion(txn)
    69  			if err != nil {
    70  				return err
    71  			}
    72  			if latestVersion.Version != currentCN.Version {
    73  				panic("BUG: current cn's version(" +
    74  					currentCN.Version +
    75  					") must equal cluster latest version(" +
    76  					latestVersion.Version +
    77  					")")
    78  			}
    79  
    80  			upgraded = true
    81  			for {
    82  				// upgrade completed
    83  				if s.upgrade.finalVersionCompleted.Load() {
    84  					break
    85  				}
    86  
    87  				upgrades, err := versions.GetUpgradeVersions(latestVersion.Version, latestVersion.VersionOffset, txn, false, true)
    88  				if err != nil {
    89  					return err
    90  				}
    91  				// latest cluster is already upgrade completed
    92  				if upgrades[len(upgrades)-1].State == versions.StateUpgradingTenant ||
    93  					upgrades[len(upgrades)-1].State == versions.StateReady {
    94  					break
    95  				}
    96  
    97  				time.Sleep(time.Second)
    98  			}
    99  
   100  			// upgrade in current goroutine immediately
   101  			version, err = versions.GetTenantCreateVersionForUpdate(tenantID, txn)
   102  			if err != nil {
   103  				return err
   104  			}
   105  			from := version
   106  			for _, v := range s.handles {
   107  				if versions.Compare(v.Metadata().Version, from) > 0 &&
   108  					v.Metadata().CanDirectUpgrade(from) {
   109  					if err := v.HandleTenantUpgrade(ctx, tenantID, txn); err != nil {
   110  						return err
   111  					}
   112  					if err := versions.UpgradeTenantVersion(tenantID, v.Metadata().Version, txn); err != nil {
   113  						return err
   114  					}
   115  					from = v.Metadata().Version
   116  				}
   117  			}
   118  			return nil
   119  		},
   120  		opts)
   121  	if err != nil {
   122  		return false, err
   123  	}
   124  	s.mu.Lock()
   125  	s.mu.tenants[tenantID] = true
   126  	s.mu.Unlock()
   127  	return upgraded, nil
   128  }
   129  
   130  // asyncUpgradeTenantTask is a task to execute the tenant upgrade logic in
   131  // parallel based on the grouped tenant batch.
   132  func (s *service) asyncUpgradeTenantTask(ctx context.Context) {
   133  	fn := func() (bool, error) {
   134  		ctx, cancel := context.WithTimeout(ctx, time.Hour*24)
   135  		defer cancel()
   136  
   137  		hasUpgradeTenants := false
   138  		opts := executor.Options{}.
   139  			WithDatabase(catalog.MO_CATALOG).
   140  			WithMinCommittedTS(s.now()).
   141  			WithWaitCommittedLogApplied().
   142  			WithTimeZone(time.Local)
   143  		err := s.exec.ExecTxn(
   144  			ctx,
   145  			func(txn executor.TxnExecutor) error {
   146  				upgrade, ok, err := versions.GetUpgradingTenantVersion(txn)
   147  				if err != nil {
   148  					getUpgradeLogger().Error("failed to get upgrading tenant version",
   149  						zap.Error(err))
   150  					return err
   151  				}
   152  
   153  				getUpgradeLogger().Info("get upgrading tenant version",
   154  					zap.String("upgrade", upgrade.String()),
   155  					zap.Bool("has", ok))
   156  				if !ok || upgrade.TotalTenant == upgrade.ReadyTenant {
   157  					return nil
   158  				}
   159  
   160  				// no upgrade logic on current cn, skip
   161  				v := s.getFinalVersionHandle().Metadata().Version
   162  				if versions.Compare(upgrade.ToVersion, v) > 0 {
   163  					getUpgradeLogger().Info("skip upgrade tenant",
   164  						zap.String("final", v),
   165  						zap.String("to", upgrade.ToVersion))
   166  					return nil
   167  				}
   168  
   169  				// select task and tenants for update
   170  				taskID, tenants, createVersions, err := versions.GetUpgradeTenantTasks(upgrade.ID, txn)
   171  				if err != nil {
   172  					getUpgradeLogger().Error("failed to load upgrade tenants",
   173  						zap.String("upgrade", upgrade.String()),
   174  						zap.Error(err))
   175  					return err
   176  				}
   177  
   178  				getUpgradeLogger().Info("load upgrade tenants",
   179  					zap.Int("count", len(tenants)),
   180  					zap.String("upgrade", upgrade.String()))
   181  				if len(tenants) == 0 {
   182  					return nil
   183  				}
   184  
   185  				hasUpgradeTenants = true
   186  				h := s.getVersionHandle(upgrade.ToVersion)
   187  				updated := int32(0)
   188  				for i, id := range tenants {
   189  					createVersion := createVersions[i]
   190  
   191  					getUpgradeLogger().Info("upgrade tenant",
   192  						zap.Int32("tenant", id),
   193  						zap.String("tenant-version", createVersion),
   194  						zap.String("upgrade", upgrade.String()))
   195  
   196  					// createVersion >= upgrade.ToVersion already upgrade
   197  					if versions.Compare(createVersion, upgrade.ToVersion) > 0 {
   198  						continue
   199  					}
   200  
   201  					getUpgradeLogger().Info("execute upgrade tenant",
   202  						zap.Int32("tenant", id),
   203  						zap.String("tenant-version", createVersion),
   204  						zap.String("upgrade", upgrade.String()))
   205  
   206  					if err := h.HandleTenantUpgrade(ctx, id, txn); err != nil {
   207  						getUpgradeLogger().Error("failed to execute upgrade tenant",
   208  							zap.Int32("tenant", id),
   209  							zap.String("tenant-version", createVersion),
   210  							zap.String("upgrade", upgrade.String()),
   211  							zap.Error(err))
   212  						return err
   213  					}
   214  
   215  					if err := versions.UpgradeTenantVersion(id, h.Metadata().Version, txn); err != nil {
   216  						getUpgradeLogger().Error("failed to update upgrade tenant create version",
   217  							zap.Int32("tenant", id),
   218  							zap.String("upgrade", upgrade.String()),
   219  							zap.Error(err))
   220  						return err
   221  					}
   222  
   223  					getUpgradeLogger().Info("execute upgrade tenant completed",
   224  						zap.Int32("tenant", id),
   225  						zap.String("tenant-version", createVersion),
   226  						zap.String("upgrade", upgrade.String()))
   227  					updated++
   228  				}
   229  
   230  				if err := versions.UpdateUpgradeTenantTaskState(taskID, versions.Yes, txn); err != nil {
   231  					getUpgradeLogger().Error("failed to update upgrade tenant state",
   232  						zap.String("upgrade", upgrade.String()))
   233  					return err
   234  				}
   235  				getUpgradeLogger().Info("tenant state updated",
   236  					zap.Int32("from", tenants[0]),
   237  					zap.Int32("to", tenants[len(tenants)-1]),
   238  					zap.String("upgrade", upgrade.String()))
   239  
   240  				// update count, we need using select for update to avoid concurrent update
   241  				upgrade, err = versions.GetUpgradeVersionForUpdateByID(upgrade.ID, txn)
   242  				if err != nil {
   243  					getUpgradeLogger().Error("failed to get latest upgrade info",
   244  						zap.String("upgrade", upgrade.String()))
   245  					return err
   246  				}
   247  
   248  				upgrade.ReadyTenant += updated
   249  				if upgrade.TotalTenant < upgrade.ReadyTenant {
   250  					panic(fmt.Sprintf("BUG: invalid upgrade tenant, upgrade %s, updated %d", upgrade.String(), updated))
   251  				}
   252  
   253  				getUpgradeLogger().Info("upgrade tenant ready count changed",
   254  					zap.String("upgrade", upgrade.String()))
   255  
   256  				if upgrade.State == versions.StateReady {
   257  					return nil
   258  				}
   259  				return versions.UpdateVersionUpgradeTasks(upgrade, txn)
   260  			},
   261  			opts)
   262  		if err != nil {
   263  			getUpgradeLogger().Error("tenant task handle failed",
   264  				zap.Error(err))
   265  			return false, err
   266  		}
   267  		return hasUpgradeTenants, nil
   268  	}
   269  
   270  	timer := time.NewTimer(s.upgrade.checkUpgradeTenantDuration)
   271  	defer timer.Stop()
   272  
   273  	for {
   274  		select {
   275  		case <-ctx.Done():
   276  			return
   277  		case <-timer.C:
   278  			if s.upgrade.finalVersionCompleted.Load() {
   279  				return
   280  			}
   281  
   282  			for {
   283  				if hasUpgradeTenants, err := fn(); err != nil || hasUpgradeTenants {
   284  					continue
   285  				}
   286  				break
   287  			}
   288  			timer.Reset(s.upgrade.checkUpgradeTenantDuration)
   289  		}
   290  	}
   291  }
   292  
   293  func fetchTenants(
   294  	batch int,
   295  	fn func([]int32) error,
   296  	txn executor.TxnExecutor) error {
   297  	last := int32(-1)
   298  	var ids []int32
   299  	for {
   300  		ids = ids[:0]
   301  		sql := fmt.Sprintf("select account_id from mo_account where account_id > %d order by account_id limit %d",
   302  			last,
   303  			batch)
   304  		res, err := txn.Exec(sql, executor.StatementOption{})
   305  		if err != nil {
   306  			return err
   307  		}
   308  		n := 0
   309  		res.ReadRows(func(rows int, cols []*vector.Vector) bool {
   310  			for i := 0; i < rows; i++ {
   311  				last = vector.GetFixedAt[int32](cols[0], i)
   312  				ids = append(ids, last)
   313  				n++
   314  			}
   315  			return true
   316  		})
   317  		res.Close()
   318  		if n == 0 {
   319  			return nil
   320  		}
   321  		if err := fn(ids); err != nil {
   322  			return err
   323  		}
   324  	}
   325  }