github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/checkpoint/replay.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package checkpoint 16 17 import ( 18 "context" 19 "fmt" 20 "github.com/matrixorigin/matrixone/pkg/fileservice" 21 "sort" 22 "sync" 23 "time" 24 25 "github.com/matrixorigin/matrixone/pkg/objectio" 26 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/container/types" 29 "github.com/matrixorigin/matrixone/pkg/logutil" 30 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio" 31 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logtail" 35 ) 36 37 const ( 38 PrefetchData uint16 = iota 39 PrefetchMetaIdx 40 ReadMetaIdx 41 ReadData 42 ) 43 44 func (r *runner) Replay(dataFactory catalog.DataFactory) ( 45 maxTs types.TS, 46 maxLSN uint64, 47 isLSNValid bool, 48 err error) { 49 defer func() { 50 if maxTs.IsEmpty() { 51 isLSNValid = true 52 } 53 }() 54 t0 := time.Now() 55 ctx := r.ctx 56 dirs, err := r.rt.Fs.ListDir(CheckpointDir) 57 if err != nil { 58 return 59 } 60 if len(dirs) == 0 { 61 return 62 } 63 metaFiles := make([]*MetaFile, 0) 64 var readDuration, applyDuration time.Duration 65 for i, dir := range dirs { 66 start, end := blockio.DecodeCheckpointMetadataFileName(dir.Name) 67 metaFiles = append(metaFiles, &MetaFile{ 68 start: start, 69 end: end, 70 index: i, 71 }) 72 } 73 sort.Slice(metaFiles, func(i, j int) bool { 74 return metaFiles[i].end.Less(&metaFiles[j].end) 75 }) 76 targetIdx := metaFiles[len(metaFiles)-1].index 77 dir := dirs[targetIdx] 78 reader, err := blockio.NewFileReader(r.rt.Fs.Service, CheckpointDir+dir.Name) 79 if err != nil { 80 return 81 } 82 bats, closeCB, err := reader.LoadAllColumns(ctx, nil, common.CheckpointAllocator) 83 if err != nil { 84 return 85 } 86 defer func() { 87 if closeCB != nil { 88 closeCB() 89 } 90 }() 91 bat := containers.NewBatch() 92 defer bat.Close() 93 colNames := CheckpointSchema.Attrs() 94 colTypes := CheckpointSchema.Types() 95 var checkpointVersion int 96 // in version 1, checkpoint metadata doesn't contain 'version'. 97 vecLen := len(bats[0].Vecs) 98 logutil.Infof("checkpoint version: %d, list and load duration: %v", vecLen, time.Since(t0)) 99 if vecLen < CheckpointSchemaColumnCountV1 { 100 checkpointVersion = 1 101 } else if vecLen < CheckpointSchemaColumnCountV2 { 102 checkpointVersion = 2 103 } else { 104 checkpointVersion = 3 105 } 106 for i := range bats[0].Vecs { 107 if len(bats) == 0 { 108 continue 109 } 110 var vec containers.Vector 111 if bats[0].Vecs[i].Length() == 0 { 112 vec = containers.MakeVector(colTypes[i], common.CheckpointAllocator) 113 } else { 114 vec = containers.ToTNVector(bats[0].Vecs[i], common.CheckpointAllocator) 115 } 116 bat.AddVector(colNames[i], vec) 117 } 118 readDuration += time.Since(t0) 119 datas := make([]*logtail.CheckpointData, bat.Length()) 120 121 entries, maxGlobalEnd := replayCheckpointEntries(bat, checkpointVersion) 122 emptyFile := make([]*CheckpointEntry, 0) 123 var emptyFileMu sync.RWMutex 124 closecbs := make([]func(), 0) 125 var readCount, applyCount, totalCount int 126 totalCount = len(entries) 127 readfn := func(i int, readType uint16) { 128 checkpointEntry := entries[i] 129 if checkpointEntry.end.Less(&maxGlobalEnd) { 130 return 131 } 132 var err2 error 133 if readType == PrefetchData { 134 if err2 = checkpointEntry.Prefetch(ctx, r.rt.Fs, datas[i]); err2 != nil { 135 logutil.Warnf("read %v failed: %v", checkpointEntry.String(), err2) 136 } 137 } else if readType == PrefetchMetaIdx { 138 readCount++ 139 datas[i], err = checkpointEntry.PrefetchMetaIdx(ctx, r.rt.Fs) 140 if err != nil { 141 return 142 } 143 } else if readType == ReadMetaIdx { 144 err = checkpointEntry.ReadMetaIdx(ctx, r.rt.Fs, datas[i]) 145 if err != nil { 146 return 147 } 148 } else { 149 if err2 = checkpointEntry.Read(ctx, r.rt.Fs, datas[i]); err2 != nil { 150 logutil.Warnf("read %v failed: %v", checkpointEntry.String(), err2) 151 emptyFileMu.Lock() 152 emptyFile = append(emptyFile, checkpointEntry) 153 emptyFileMu.Unlock() 154 } else { 155 entries[i] = checkpointEntry 156 closecbs = append(closecbs, func() { datas[i].CloseWhenLoadFromCache(checkpointEntry.version) }) 157 } 158 } 159 } 160 defer func() { 161 for _, cb := range closecbs { 162 cb() 163 } 164 }() 165 t0 = time.Now() 166 for i := 0; i < bat.Length(); i++ { 167 metaLoc := objectio.Location(bat.GetVectorByName(CheckpointAttr_MetaLocation).Get(i).([]byte)) 168 169 err = blockio.PrefetchMeta(r.rt.Fs.Service, metaLoc) 170 if err != nil { 171 return 172 } 173 } 174 for i := 0; i < bat.Length(); i++ { 175 readfn(i, PrefetchMetaIdx) 176 } 177 for i := 0; i < bat.Length(); i++ { 178 readfn(i, ReadMetaIdx) 179 } 180 for i := 0; i < bat.Length(); i++ { 181 readfn(i, PrefetchData) 182 } 183 for i := 0; i < bat.Length(); i++ { 184 readfn(i, ReadData) 185 } 186 readDuration += time.Since(t0) 187 if err != nil { 188 return 189 } 190 t0 = time.Now() 191 globalIdx := 0 192 for i := 0; i < bat.Length(); i++ { 193 checkpointEntry := entries[i] 194 if checkpointEntry == nil { 195 continue 196 } 197 if checkpointEntry.GetType() == ET_Global { 198 globalIdx = i 199 r.tryAddNewGlobalCheckpointEntry(checkpointEntry) 200 } else if checkpointEntry.GetType() == ET_Incremental { 201 r.tryAddNewIncrementalCheckpointEntry(checkpointEntry) 202 } else if checkpointEntry.GetType() == ET_Backup { 203 r.tryAddNewBackupCheckpointEntry(checkpointEntry) 204 } 205 } 206 207 var ckpVers []uint32 208 var ckpDatas []*logtail.CheckpointData 209 210 maxGlobal := r.MaxGlobalCheckpoint() 211 if maxGlobal != nil { 212 logutil.Infof("replay checkpoint %v", maxGlobal) 213 err = datas[globalIdx].ApplyReplayTo(r.catalog, dataFactory) 214 applyCount++ 215 if err != nil { 216 return 217 } 218 219 ckpVers = append(ckpVers, maxGlobal.version) 220 ckpDatas = append(ckpDatas, datas[globalIdx]) 221 222 if maxTs.Less(&maxGlobal.end) { 223 maxTs = maxGlobal.end 224 } 225 // for force checkpoint, ckpLSN is 0. 226 if maxGlobal.version >= logtail.CheckpointVersion7 && maxGlobal.ckpLSN > 0 { 227 if maxGlobal.ckpLSN < maxLSN { 228 panic(fmt.Sprintf("logic error, current lsn %d, incoming lsn %d", maxLSN, maxGlobal.ckpLSN)) 229 } 230 isLSNValid = true 231 maxLSN = maxGlobal.ckpLSN 232 } 233 } 234 for _, e := range emptyFile { 235 if e.end.GreaterEq(&maxTs) { 236 return types.TS{}, 0, false, 237 moerr.NewInternalError(ctx, 238 "read checkpoint %v failed", 239 e.String()) 240 } 241 } 242 for i := 0; i < bat.Length(); i++ { 243 checkpointEntry := entries[i] 244 if checkpointEntry == nil { 245 continue 246 } 247 if checkpointEntry.end.LessEq(&maxTs) { 248 continue 249 } 250 logutil.Infof("replay checkpoint %v", checkpointEntry) 251 err = datas[i].ApplyReplayTo(r.catalog, dataFactory) 252 applyCount++ 253 if err != nil { 254 return 255 } 256 257 ckpVers = append(ckpVers, checkpointEntry.version) 258 ckpDatas = append(ckpDatas, datas[i]) 259 260 if maxTs.Less(&checkpointEntry.end) { 261 maxTs = checkpointEntry.end 262 } 263 if checkpointEntry.version >= logtail.CheckpointVersion7 && checkpointEntry.ckpLSN != 0 { 264 if checkpointEntry.ckpLSN < maxLSN { 265 panic(fmt.Sprintf("logic error, current lsn %d, incoming lsn %d", maxLSN, checkpointEntry.ckpLSN)) 266 } 267 isLSNValid = true 268 maxLSN = checkpointEntry.ckpLSN 269 } 270 // For version 7, all ckp LSN of force ickp is 0. 271 // In db.ForceIncrementalCheckpoint,it truncates. 272 // If the last ckp is force ickp,LSN check should be disable. 273 if checkpointEntry.version == logtail.CheckpointVersion7 && checkpointEntry.ckpLSN == 0 { 274 isLSNValid = false 275 } 276 } 277 278 r.catalog.GetUsageMemo().(*logtail.TNUsageMemo).PrepareReplay(ckpDatas, ckpVers) 279 280 applyDuration = time.Since(t0) 281 logutil.Info("open-tae", common.OperationField("replay"), 282 common.OperandField("checkpoint"), 283 common.AnyField("apply cost", applyDuration), 284 common.AnyField("read cost", readDuration), 285 common.AnyField("total count", totalCount), 286 common.AnyField("read count", readCount), 287 common.AnyField("apply count", applyCount)) 288 r.source.Init(maxTs) 289 return 290 } 291 292 func MergeCkpMeta(ctx context.Context, fs fileservice.FileService, cnLocation, tnLocation objectio.Location, startTs, ts types.TS) (string, error) { 293 dirs, err := fs.List(ctx, CheckpointDir) 294 if err != nil { 295 return "", err 296 } 297 if len(dirs) == 0 { 298 return "", nil 299 } 300 metaFiles := make([]*MetaFile, 0) 301 for i, dir := range dirs { 302 start, end := blockio.DecodeCheckpointMetadataFileName(dir.Name) 303 metaFiles = append(metaFiles, &MetaFile{ 304 start: start, 305 end: end, 306 index: i, 307 }) 308 } 309 sort.Slice(metaFiles, func(i, j int) bool { 310 return metaFiles[i].end.Less(&metaFiles[j].end) 311 }) 312 targetIdx := metaFiles[len(metaFiles)-1].index 313 dir := dirs[targetIdx] 314 reader, err := blockio.NewFileReader(fs, CheckpointDir+dir.Name) 315 if err != nil { 316 return "", err 317 } 318 bats, closeCB, err := reader.LoadAllColumns(ctx, nil, common.CheckpointAllocator) 319 if err != nil { 320 return "", err 321 } 322 defer func() { 323 for i := range bats { 324 for j := range bats[i].Vecs { 325 bats[i].Vecs[j].Free(common.CheckpointAllocator) 326 } 327 } 328 if closeCB != nil { 329 closeCB() 330 } 331 }() 332 bat := containers.NewBatch() 333 defer bat.Close() 334 colNames := CheckpointSchema.Attrs() 335 colTypes := CheckpointSchema.Types() 336 for i := range bats[0].Vecs { 337 if len(bats) == 0 { 338 continue 339 } 340 var vec containers.Vector 341 if bats[0].Vecs[i].Length() == 0 { 342 vec = containers.MakeVector(colTypes[i], common.CheckpointAllocator) 343 } else { 344 vec = containers.ToTNVector(bats[0].Vecs[i], common.CheckpointAllocator) 345 } 346 bat.AddVector(colNames[i], vec) 347 } 348 last := bat.Vecs[0].Length() - 1 349 bat.GetVectorByName(CheckpointAttr_StartTS).Append(startTs, false) 350 bat.GetVectorByName(CheckpointAttr_EndTS).Append(ts, false) 351 bat.GetVectorByName(CheckpointAttr_MetaLocation).Append([]byte(cnLocation), false) 352 bat.GetVectorByName(CheckpointAttr_EntryType).Append(true, false) 353 bat.GetVectorByName(CheckpointAttr_Version).Append(bat.GetVectorByName(CheckpointAttr_Version).Get(last), false) 354 bat.GetVectorByName(CheckpointAttr_AllLocations).Append([]byte(tnLocation), false) 355 bat.GetVectorByName(CheckpointAttr_CheckpointLSN).Append(bat.GetVectorByName(CheckpointAttr_CheckpointLSN).Get(last), false) 356 bat.GetVectorByName(CheckpointAttr_TruncateLSN).Append(bat.GetVectorByName(CheckpointAttr_TruncateLSN).Get(last), false) 357 bat.GetVectorByName(CheckpointAttr_Type).Append(int8(ET_Backup), false) 358 name := blockio.EncodeCheckpointMetadataFileName(CheckpointDir, PrefixMetadata, startTs, ts) 359 writer, err := objectio.NewObjectWriterSpecial(objectio.WriterCheckpoint, name, fs) 360 if err != nil { 361 return "", err 362 } 363 if _, err = writer.Write(containers.ToCNBatch(bat)); err != nil { 364 return "", err 365 } 366 367 // TODO: checkpoint entry should maintain the location 368 _, err = writer.WriteEnd(ctx) 369 return name, err 370 } 371 372 func replayCheckpointEntries(bat *containers.Batch, checkpointVersion int) (entries []*CheckpointEntry, maxGlobalEnd types.TS) { 373 entries = make([]*CheckpointEntry, bat.Length()) 374 for i := 0; i < bat.Length(); i++ { 375 start := bat.GetVectorByName(CheckpointAttr_StartTS).Get(i).(types.TS) 376 end := bat.GetVectorByName(CheckpointAttr_EndTS).Get(i).(types.TS) 377 cnLoc := objectio.Location(bat.GetVectorByName(CheckpointAttr_MetaLocation).Get(i).([]byte)) 378 typ := ET_Global 379 if checkpointVersion > 2 { 380 typ = EntryType(bat.GetVectorByName(CheckpointAttr_Type).Get(i).(int8)) 381 } else { 382 isIncremental := bat.GetVectorByName(CheckpointAttr_EntryType).Get(i).(bool) 383 if isIncremental { 384 typ = ET_Incremental 385 } 386 } 387 var version uint32 388 if checkpointVersion == 1 { 389 version = logtail.CheckpointVersion1 390 } else { 391 version = bat.GetVectorByName(CheckpointAttr_Version).Get(i).(uint32) 392 } 393 var tnLoc objectio.Location 394 if version <= logtail.CheckpointVersion4 { 395 tnLoc = cnLoc 396 } else { 397 tnLoc = objectio.Location(bat.GetVectorByName(CheckpointAttr_AllLocations).Get(i).([]byte)) 398 } 399 var ckpLSN, truncateLSN uint64 400 if version >= logtail.CheckpointVersion7 { 401 ckpLSN = bat.GetVectorByName(CheckpointAttr_CheckpointLSN).Get(i).(uint64) 402 truncateLSN = bat.GetVectorByName(CheckpointAttr_TruncateLSN).Get(i).(uint64) 403 } 404 checkpointEntry := &CheckpointEntry{ 405 start: start, 406 end: end, 407 cnLocation: cnLoc, 408 tnLocation: tnLoc, 409 state: ST_Finished, 410 entryType: typ, 411 version: version, 412 ckpLSN: ckpLSN, 413 truncateLSN: truncateLSN, 414 } 415 entries[i] = checkpointEntry 416 if typ == ET_Global { 417 if end.Greater(&maxGlobalEnd) { 418 maxGlobalEnd = end 419 } 420 } 421 } 422 return 423 }