github.com/matrixorigin/matrixone@v1.2.0/pkg/bootstrap/service_upgrade_tenant.go (about) 1 // Copyright 2024 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bootstrap 16 17 import ( 18 "context" 19 "fmt" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/bootstrap/versions" 23 "github.com/matrixorigin/matrixone/pkg/catalog" 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/container/vector" 26 "github.com/matrixorigin/matrixone/pkg/txn/client" 27 "github.com/matrixorigin/matrixone/pkg/util/executor" 28 "go.uber.org/zap" 29 ) 30 31 // MaybeUpgradeTenant used to check the tenant need upgrade or not. If need upgrade, it will 32 // upgrade the tenant immediately in current txn. 33 func (s *service) MaybeUpgradeTenant( 34 ctx context.Context, 35 tenantFetchFunc func() (int32, string, error), 36 txnOp client.TxnOperator) (bool, error) { 37 tenantID, version, err := tenantFetchFunc() 38 if err != nil { 39 return false, err 40 } 41 42 s.mu.RLock() 43 checked := s.mu.tenants[tenantID] 44 s.mu.RUnlock() 45 if checked { 46 return false, nil 47 } 48 49 upgraded := false 50 opts := executor.Options{}.WithTxn(txnOp) 51 err = s.exec.ExecTxn( 52 ctx, 53 func(txn executor.TxnExecutor) error { 54 txn.Use(catalog.MO_CATALOG) 55 // tenant create at current cn, can work correctly 56 currentCN := s.getFinalVersionHandle().Metadata() 57 if currentCN.Version == version { 58 return nil 59 } else if versions.Compare(currentCN.Version, version) < 0 { 60 // tenant create at 1.4.0, current tenant version 1.5.0, it must be cannot work 61 return moerr.NewInvalidInputNoCtx("tenant version %s is greater than current cn version %s", 62 version, currentCN.Version) 63 } 64 65 // arrive here means tenant version < current cn version, need upgrade. 66 // and currentCN.Version == last cluster version 67 68 latestVersion, err := versions.GetLatestVersion(txn) 69 if err != nil { 70 return err 71 } 72 if latestVersion.Version != currentCN.Version { 73 panic("BUG: current cn's version(" + 74 currentCN.Version + 75 ") must equal cluster latest version(" + 76 latestVersion.Version + 77 ")") 78 } 79 80 upgraded = true 81 for { 82 // upgrade completed 83 if s.upgrade.finalVersionCompleted.Load() { 84 break 85 } 86 87 upgrades, err := versions.GetUpgradeVersions(latestVersion.Version, latestVersion.VersionOffset, txn, false, true) 88 if err != nil { 89 return err 90 } 91 // latest cluster is already upgrade completed 92 if upgrades[len(upgrades)-1].State == versions.StateUpgradingTenant || 93 upgrades[len(upgrades)-1].State == versions.StateReady { 94 break 95 } 96 97 time.Sleep(time.Second) 98 } 99 100 // upgrade in current goroutine immediately 101 version, err = versions.GetTenantCreateVersionForUpdate(tenantID, txn) 102 if err != nil { 103 return err 104 } 105 from := version 106 for _, v := range s.handles { 107 if versions.Compare(v.Metadata().Version, from) > 0 && 108 v.Metadata().CanDirectUpgrade(from) { 109 if err := v.HandleTenantUpgrade(ctx, tenantID, txn); err != nil { 110 return err 111 } 112 if err := versions.UpgradeTenantVersion(tenantID, v.Metadata().Version, txn); err != nil { 113 return err 114 } 115 from = v.Metadata().Version 116 } 117 } 118 return nil 119 }, 120 opts) 121 if err != nil { 122 return false, err 123 } 124 s.mu.Lock() 125 s.mu.tenants[tenantID] = true 126 s.mu.Unlock() 127 return upgraded, nil 128 } 129 130 // asyncUpgradeTenantTask is a task to execute the tenant upgrade logic in 131 // parallel based on the grouped tenant batch. 132 func (s *service) asyncUpgradeTenantTask(ctx context.Context) { 133 fn := func() (bool, error) { 134 ctx, cancel := context.WithTimeout(ctx, time.Hour*24) 135 defer cancel() 136 137 hasUpgradeTenants := false 138 opts := executor.Options{}. 139 WithDatabase(catalog.MO_CATALOG). 140 WithMinCommittedTS(s.now()). 141 WithWaitCommittedLogApplied(). 142 WithTimeZone(time.Local) 143 err := s.exec.ExecTxn( 144 ctx, 145 func(txn executor.TxnExecutor) error { 146 upgrade, ok, err := versions.GetUpgradingTenantVersion(txn) 147 if err != nil { 148 getUpgradeLogger().Error("failed to get upgrading tenant version", 149 zap.Error(err)) 150 return err 151 } 152 153 getUpgradeLogger().Info("get upgrading tenant version", 154 zap.String("upgrade", upgrade.String()), 155 zap.Bool("has", ok)) 156 if !ok || upgrade.TotalTenant == upgrade.ReadyTenant { 157 return nil 158 } 159 160 // no upgrade logic on current cn, skip 161 v := s.getFinalVersionHandle().Metadata().Version 162 if versions.Compare(upgrade.ToVersion, v) > 0 { 163 getUpgradeLogger().Info("skip upgrade tenant", 164 zap.String("final", v), 165 zap.String("to", upgrade.ToVersion)) 166 return nil 167 } 168 169 // select task and tenants for update 170 taskID, tenants, createVersions, err := versions.GetUpgradeTenantTasks(upgrade.ID, txn) 171 if err != nil { 172 getUpgradeLogger().Error("failed to load upgrade tenants", 173 zap.String("upgrade", upgrade.String()), 174 zap.Error(err)) 175 return err 176 } 177 178 getUpgradeLogger().Info("load upgrade tenants", 179 zap.Int("count", len(tenants)), 180 zap.String("upgrade", upgrade.String())) 181 if len(tenants) == 0 { 182 return nil 183 } 184 185 hasUpgradeTenants = true 186 h := s.getVersionHandle(upgrade.ToVersion) 187 updated := int32(0) 188 for i, id := range tenants { 189 createVersion := createVersions[i] 190 191 getUpgradeLogger().Info("upgrade tenant", 192 zap.Int32("tenant", id), 193 zap.String("tenant-version", createVersion), 194 zap.String("upgrade", upgrade.String())) 195 196 // createVersion >= upgrade.ToVersion already upgrade 197 if versions.Compare(createVersion, upgrade.ToVersion) > 0 { 198 continue 199 } 200 201 getUpgradeLogger().Info("execute upgrade tenant", 202 zap.Int32("tenant", id), 203 zap.String("tenant-version", createVersion), 204 zap.String("upgrade", upgrade.String())) 205 206 if err := h.HandleTenantUpgrade(ctx, id, txn); err != nil { 207 getUpgradeLogger().Error("failed to execute upgrade tenant", 208 zap.Int32("tenant", id), 209 zap.String("tenant-version", createVersion), 210 zap.String("upgrade", upgrade.String()), 211 zap.Error(err)) 212 return err 213 } 214 215 if err := versions.UpgradeTenantVersion(id, h.Metadata().Version, txn); err != nil { 216 getUpgradeLogger().Error("failed to update upgrade tenant create version", 217 zap.Int32("tenant", id), 218 zap.String("upgrade", upgrade.String()), 219 zap.Error(err)) 220 return err 221 } 222 223 getUpgradeLogger().Info("execute upgrade tenant completed", 224 zap.Int32("tenant", id), 225 zap.String("tenant-version", createVersion), 226 zap.String("upgrade", upgrade.String())) 227 updated++ 228 } 229 230 if err := versions.UpdateUpgradeTenantTaskState(taskID, versions.Yes, txn); err != nil { 231 getUpgradeLogger().Error("failed to update upgrade tenant state", 232 zap.String("upgrade", upgrade.String())) 233 return err 234 } 235 getUpgradeLogger().Info("tenant state updated", 236 zap.Int32("from", tenants[0]), 237 zap.Int32("to", tenants[len(tenants)-1]), 238 zap.String("upgrade", upgrade.String())) 239 240 // update count, we need using select for update to avoid concurrent update 241 upgrade, err = versions.GetUpgradeVersionForUpdateByID(upgrade.ID, txn) 242 if err != nil { 243 getUpgradeLogger().Error("failed to get latest upgrade info", 244 zap.String("upgrade", upgrade.String())) 245 return err 246 } 247 248 upgrade.ReadyTenant += updated 249 if upgrade.TotalTenant < upgrade.ReadyTenant { 250 panic(fmt.Sprintf("BUG: invalid upgrade tenant, upgrade %s, updated %d", upgrade.String(), updated)) 251 } 252 253 getUpgradeLogger().Info("upgrade tenant ready count changed", 254 zap.String("upgrade", upgrade.String())) 255 256 if upgrade.State == versions.StateReady { 257 return nil 258 } 259 return versions.UpdateVersionUpgradeTasks(upgrade, txn) 260 }, 261 opts) 262 if err != nil { 263 getUpgradeLogger().Error("tenant task handle failed", 264 zap.Error(err)) 265 return false, err 266 } 267 return hasUpgradeTenants, nil 268 } 269 270 timer := time.NewTimer(s.upgrade.checkUpgradeTenantDuration) 271 defer timer.Stop() 272 273 for { 274 select { 275 case <-ctx.Done(): 276 return 277 case <-timer.C: 278 if s.upgrade.finalVersionCompleted.Load() { 279 return 280 } 281 282 for { 283 if hasUpgradeTenants, err := fn(); err != nil || hasUpgradeTenants { 284 continue 285 } 286 break 287 } 288 timer.Reset(s.upgrade.checkUpgradeTenantDuration) 289 } 290 } 291 } 292 293 func fetchTenants( 294 batch int, 295 fn func([]int32) error, 296 txn executor.TxnExecutor) error { 297 last := int32(-1) 298 var ids []int32 299 for { 300 ids = ids[:0] 301 sql := fmt.Sprintf("select account_id from mo_account where account_id > %d order by account_id limit %d", 302 last, 303 batch) 304 res, err := txn.Exec(sql, executor.StatementOption{}) 305 if err != nil { 306 return err 307 } 308 n := 0 309 res.ReadRows(func(rows int, cols []*vector.Vector) bool { 310 for i := 0; i < rows; i++ { 311 last = vector.GetFixedAt[int32](cols[0], i) 312 ids = append(ids, last) 313 n++ 314 } 315 return true 316 }) 317 res.Close() 318 if n == 0 { 319 return nil 320 } 321 if err := fn(ids); err != nil { 322 return err 323 } 324 } 325 }