github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/checkpoint/replay.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package checkpoint
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    21  	"sort"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/objectio"
    26  
    27  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    28  	"github.com/matrixorigin/matrixone/pkg/container/types"
    29  	"github.com/matrixorigin/matrixone/pkg/logutil"
    30  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio"
    31  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    33  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    34  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logtail"
    35  )
    36  
    37  const (
    38  	PrefetchData uint16 = iota
    39  	PrefetchMetaIdx
    40  	ReadMetaIdx
    41  	ReadData
    42  )
    43  
    44  func (r *runner) Replay(dataFactory catalog.DataFactory) (
    45  	maxTs types.TS,
    46  	maxLSN uint64,
    47  	isLSNValid bool,
    48  	err error) {
    49  	defer func() {
    50  		if maxTs.IsEmpty() {
    51  			isLSNValid = true
    52  		}
    53  	}()
    54  	t0 := time.Now()
    55  	ctx := r.ctx
    56  	dirs, err := r.rt.Fs.ListDir(CheckpointDir)
    57  	if err != nil {
    58  		return
    59  	}
    60  	if len(dirs) == 0 {
    61  		return
    62  	}
    63  	metaFiles := make([]*MetaFile, 0)
    64  	var readDuration, applyDuration time.Duration
    65  	for i, dir := range dirs {
    66  		start, end := blockio.DecodeCheckpointMetadataFileName(dir.Name)
    67  		metaFiles = append(metaFiles, &MetaFile{
    68  			start: start,
    69  			end:   end,
    70  			index: i,
    71  		})
    72  	}
    73  	sort.Slice(metaFiles, func(i, j int) bool {
    74  		return metaFiles[i].end.Less(&metaFiles[j].end)
    75  	})
    76  	targetIdx := metaFiles[len(metaFiles)-1].index
    77  	dir := dirs[targetIdx]
    78  	reader, err := blockio.NewFileReader(r.rt.Fs.Service, CheckpointDir+dir.Name)
    79  	if err != nil {
    80  		return
    81  	}
    82  	bats, closeCB, err := reader.LoadAllColumns(ctx, nil, common.CheckpointAllocator)
    83  	if err != nil {
    84  		return
    85  	}
    86  	defer func() {
    87  		if closeCB != nil {
    88  			closeCB()
    89  		}
    90  	}()
    91  	bat := containers.NewBatch()
    92  	defer bat.Close()
    93  	colNames := CheckpointSchema.Attrs()
    94  	colTypes := CheckpointSchema.Types()
    95  	var checkpointVersion int
    96  	// in version 1, checkpoint metadata doesn't contain 'version'.
    97  	vecLen := len(bats[0].Vecs)
    98  	logutil.Infof("checkpoint version: %d, list and load duration: %v", vecLen, time.Since(t0))
    99  	if vecLen < CheckpointSchemaColumnCountV1 {
   100  		checkpointVersion = 1
   101  	} else if vecLen < CheckpointSchemaColumnCountV2 {
   102  		checkpointVersion = 2
   103  	} else {
   104  		checkpointVersion = 3
   105  	}
   106  	for i := range bats[0].Vecs {
   107  		if len(bats) == 0 {
   108  			continue
   109  		}
   110  		var vec containers.Vector
   111  		if bats[0].Vecs[i].Length() == 0 {
   112  			vec = containers.MakeVector(colTypes[i], common.CheckpointAllocator)
   113  		} else {
   114  			vec = containers.ToTNVector(bats[0].Vecs[i], common.CheckpointAllocator)
   115  		}
   116  		bat.AddVector(colNames[i], vec)
   117  	}
   118  	readDuration += time.Since(t0)
   119  	datas := make([]*logtail.CheckpointData, bat.Length())
   120  
   121  	entries, maxGlobalEnd := replayCheckpointEntries(bat, checkpointVersion)
   122  	emptyFile := make([]*CheckpointEntry, 0)
   123  	var emptyFileMu sync.RWMutex
   124  	closecbs := make([]func(), 0)
   125  	var readCount, applyCount, totalCount int
   126  	totalCount = len(entries)
   127  	readfn := func(i int, readType uint16) {
   128  		checkpointEntry := entries[i]
   129  		if checkpointEntry.end.Less(&maxGlobalEnd) {
   130  			return
   131  		}
   132  		var err2 error
   133  		if readType == PrefetchData {
   134  			if err2 = checkpointEntry.Prefetch(ctx, r.rt.Fs, datas[i]); err2 != nil {
   135  				logutil.Warnf("read %v failed: %v", checkpointEntry.String(), err2)
   136  			}
   137  		} else if readType == PrefetchMetaIdx {
   138  			readCount++
   139  			datas[i], err = checkpointEntry.PrefetchMetaIdx(ctx, r.rt.Fs)
   140  			if err != nil {
   141  				return
   142  			}
   143  		} else if readType == ReadMetaIdx {
   144  			err = checkpointEntry.ReadMetaIdx(ctx, r.rt.Fs, datas[i])
   145  			if err != nil {
   146  				return
   147  			}
   148  		} else {
   149  			if err2 = checkpointEntry.Read(ctx, r.rt.Fs, datas[i]); err2 != nil {
   150  				logutil.Warnf("read %v failed: %v", checkpointEntry.String(), err2)
   151  				emptyFileMu.Lock()
   152  				emptyFile = append(emptyFile, checkpointEntry)
   153  				emptyFileMu.Unlock()
   154  			} else {
   155  				entries[i] = checkpointEntry
   156  				closecbs = append(closecbs, func() { datas[i].CloseWhenLoadFromCache(checkpointEntry.version) })
   157  			}
   158  		}
   159  	}
   160  	defer func() {
   161  		for _, cb := range closecbs {
   162  			cb()
   163  		}
   164  	}()
   165  	t0 = time.Now()
   166  	for i := 0; i < bat.Length(); i++ {
   167  		metaLoc := objectio.Location(bat.GetVectorByName(CheckpointAttr_MetaLocation).Get(i).([]byte))
   168  
   169  		err = blockio.PrefetchMeta(r.rt.Fs.Service, metaLoc)
   170  		if err != nil {
   171  			return
   172  		}
   173  	}
   174  	for i := 0; i < bat.Length(); i++ {
   175  		readfn(i, PrefetchMetaIdx)
   176  	}
   177  	for i := 0; i < bat.Length(); i++ {
   178  		readfn(i, ReadMetaIdx)
   179  	}
   180  	for i := 0; i < bat.Length(); i++ {
   181  		readfn(i, PrefetchData)
   182  	}
   183  	for i := 0; i < bat.Length(); i++ {
   184  		readfn(i, ReadData)
   185  	}
   186  	readDuration += time.Since(t0)
   187  	if err != nil {
   188  		return
   189  	}
   190  	t0 = time.Now()
   191  	globalIdx := 0
   192  	for i := 0; i < bat.Length(); i++ {
   193  		checkpointEntry := entries[i]
   194  		if checkpointEntry == nil {
   195  			continue
   196  		}
   197  		if checkpointEntry.GetType() == ET_Global {
   198  			globalIdx = i
   199  			r.tryAddNewGlobalCheckpointEntry(checkpointEntry)
   200  		} else if checkpointEntry.GetType() == ET_Incremental {
   201  			r.tryAddNewIncrementalCheckpointEntry(checkpointEntry)
   202  		} else if checkpointEntry.GetType() == ET_Backup {
   203  			r.tryAddNewBackupCheckpointEntry(checkpointEntry)
   204  		}
   205  	}
   206  
   207  	var ckpVers []uint32
   208  	var ckpDatas []*logtail.CheckpointData
   209  
   210  	maxGlobal := r.MaxGlobalCheckpoint()
   211  	if maxGlobal != nil {
   212  		logutil.Infof("replay checkpoint %v", maxGlobal)
   213  		err = datas[globalIdx].ApplyReplayTo(r.catalog, dataFactory)
   214  		applyCount++
   215  		if err != nil {
   216  			return
   217  		}
   218  
   219  		ckpVers = append(ckpVers, maxGlobal.version)
   220  		ckpDatas = append(ckpDatas, datas[globalIdx])
   221  
   222  		if maxTs.Less(&maxGlobal.end) {
   223  			maxTs = maxGlobal.end
   224  		}
   225  		// for force checkpoint, ckpLSN is 0.
   226  		if maxGlobal.version >= logtail.CheckpointVersion7 && maxGlobal.ckpLSN > 0 {
   227  			if maxGlobal.ckpLSN < maxLSN {
   228  				panic(fmt.Sprintf("logic error, current lsn %d, incoming lsn %d", maxLSN, maxGlobal.ckpLSN))
   229  			}
   230  			isLSNValid = true
   231  			maxLSN = maxGlobal.ckpLSN
   232  		}
   233  	}
   234  	for _, e := range emptyFile {
   235  		if e.end.GreaterEq(&maxTs) {
   236  			return types.TS{}, 0, false,
   237  				moerr.NewInternalError(ctx,
   238  					"read checkpoint %v failed",
   239  					e.String())
   240  		}
   241  	}
   242  	for i := 0; i < bat.Length(); i++ {
   243  		checkpointEntry := entries[i]
   244  		if checkpointEntry == nil {
   245  			continue
   246  		}
   247  		if checkpointEntry.end.LessEq(&maxTs) {
   248  			continue
   249  		}
   250  		logutil.Infof("replay checkpoint %v", checkpointEntry)
   251  		err = datas[i].ApplyReplayTo(r.catalog, dataFactory)
   252  		applyCount++
   253  		if err != nil {
   254  			return
   255  		}
   256  
   257  		ckpVers = append(ckpVers, checkpointEntry.version)
   258  		ckpDatas = append(ckpDatas, datas[i])
   259  
   260  		if maxTs.Less(&checkpointEntry.end) {
   261  			maxTs = checkpointEntry.end
   262  		}
   263  		if checkpointEntry.version >= logtail.CheckpointVersion7 && checkpointEntry.ckpLSN != 0 {
   264  			if checkpointEntry.ckpLSN < maxLSN {
   265  				panic(fmt.Sprintf("logic error, current lsn %d, incoming lsn %d", maxLSN, checkpointEntry.ckpLSN))
   266  			}
   267  			isLSNValid = true
   268  			maxLSN = checkpointEntry.ckpLSN
   269  		}
   270  		// For version 7, all ckp LSN of force ickp is 0.
   271  		// In db.ForceIncrementalCheckpoint,it truncates.
   272  		// If the last ckp is force ickp,LSN check should be disable.
   273  		if checkpointEntry.version == logtail.CheckpointVersion7 && checkpointEntry.ckpLSN == 0 {
   274  			isLSNValid = false
   275  		}
   276  	}
   277  
   278  	r.catalog.GetUsageMemo().(*logtail.TNUsageMemo).PrepareReplay(ckpDatas, ckpVers)
   279  
   280  	applyDuration = time.Since(t0)
   281  	logutil.Info("open-tae", common.OperationField("replay"),
   282  		common.OperandField("checkpoint"),
   283  		common.AnyField("apply cost", applyDuration),
   284  		common.AnyField("read cost", readDuration),
   285  		common.AnyField("total count", totalCount),
   286  		common.AnyField("read count", readCount),
   287  		common.AnyField("apply count", applyCount))
   288  	r.source.Init(maxTs)
   289  	return
   290  }
   291  
   292  func MergeCkpMeta(ctx context.Context, fs fileservice.FileService, cnLocation, tnLocation objectio.Location, startTs, ts types.TS) (string, error) {
   293  	dirs, err := fs.List(ctx, CheckpointDir)
   294  	if err != nil {
   295  		return "", err
   296  	}
   297  	if len(dirs) == 0 {
   298  		return "", nil
   299  	}
   300  	metaFiles := make([]*MetaFile, 0)
   301  	for i, dir := range dirs {
   302  		start, end := blockio.DecodeCheckpointMetadataFileName(dir.Name)
   303  		metaFiles = append(metaFiles, &MetaFile{
   304  			start: start,
   305  			end:   end,
   306  			index: i,
   307  		})
   308  	}
   309  	sort.Slice(metaFiles, func(i, j int) bool {
   310  		return metaFiles[i].end.Less(&metaFiles[j].end)
   311  	})
   312  	targetIdx := metaFiles[len(metaFiles)-1].index
   313  	dir := dirs[targetIdx]
   314  	reader, err := blockio.NewFileReader(fs, CheckpointDir+dir.Name)
   315  	if err != nil {
   316  		return "", err
   317  	}
   318  	bats, closeCB, err := reader.LoadAllColumns(ctx, nil, common.CheckpointAllocator)
   319  	if err != nil {
   320  		return "", err
   321  	}
   322  	defer func() {
   323  		for i := range bats {
   324  			for j := range bats[i].Vecs {
   325  				bats[i].Vecs[j].Free(common.CheckpointAllocator)
   326  			}
   327  		}
   328  		if closeCB != nil {
   329  			closeCB()
   330  		}
   331  	}()
   332  	bat := containers.NewBatch()
   333  	defer bat.Close()
   334  	colNames := CheckpointSchema.Attrs()
   335  	colTypes := CheckpointSchema.Types()
   336  	for i := range bats[0].Vecs {
   337  		if len(bats) == 0 {
   338  			continue
   339  		}
   340  		var vec containers.Vector
   341  		if bats[0].Vecs[i].Length() == 0 {
   342  			vec = containers.MakeVector(colTypes[i], common.CheckpointAllocator)
   343  		} else {
   344  			vec = containers.ToTNVector(bats[0].Vecs[i], common.CheckpointAllocator)
   345  		}
   346  		bat.AddVector(colNames[i], vec)
   347  	}
   348  	last := bat.Vecs[0].Length() - 1
   349  	bat.GetVectorByName(CheckpointAttr_StartTS).Append(startTs, false)
   350  	bat.GetVectorByName(CheckpointAttr_EndTS).Append(ts, false)
   351  	bat.GetVectorByName(CheckpointAttr_MetaLocation).Append([]byte(cnLocation), false)
   352  	bat.GetVectorByName(CheckpointAttr_EntryType).Append(true, false)
   353  	bat.GetVectorByName(CheckpointAttr_Version).Append(bat.GetVectorByName(CheckpointAttr_Version).Get(last), false)
   354  	bat.GetVectorByName(CheckpointAttr_AllLocations).Append([]byte(tnLocation), false)
   355  	bat.GetVectorByName(CheckpointAttr_CheckpointLSN).Append(bat.GetVectorByName(CheckpointAttr_CheckpointLSN).Get(last), false)
   356  	bat.GetVectorByName(CheckpointAttr_TruncateLSN).Append(bat.GetVectorByName(CheckpointAttr_TruncateLSN).Get(last), false)
   357  	bat.GetVectorByName(CheckpointAttr_Type).Append(int8(ET_Backup), false)
   358  	name := blockio.EncodeCheckpointMetadataFileName(CheckpointDir, PrefixMetadata, startTs, ts)
   359  	writer, err := objectio.NewObjectWriterSpecial(objectio.WriterCheckpoint, name, fs)
   360  	if err != nil {
   361  		return "", err
   362  	}
   363  	if _, err = writer.Write(containers.ToCNBatch(bat)); err != nil {
   364  		return "", err
   365  	}
   366  
   367  	// TODO: checkpoint entry should maintain the location
   368  	_, err = writer.WriteEnd(ctx)
   369  	return name, err
   370  }
   371  
   372  func replayCheckpointEntries(bat *containers.Batch, checkpointVersion int) (entries []*CheckpointEntry, maxGlobalEnd types.TS) {
   373  	entries = make([]*CheckpointEntry, bat.Length())
   374  	for i := 0; i < bat.Length(); i++ {
   375  		start := bat.GetVectorByName(CheckpointAttr_StartTS).Get(i).(types.TS)
   376  		end := bat.GetVectorByName(CheckpointAttr_EndTS).Get(i).(types.TS)
   377  		cnLoc := objectio.Location(bat.GetVectorByName(CheckpointAttr_MetaLocation).Get(i).([]byte))
   378  		typ := ET_Global
   379  		if checkpointVersion > 2 {
   380  			typ = EntryType(bat.GetVectorByName(CheckpointAttr_Type).Get(i).(int8))
   381  		} else {
   382  			isIncremental := bat.GetVectorByName(CheckpointAttr_EntryType).Get(i).(bool)
   383  			if isIncremental {
   384  				typ = ET_Incremental
   385  			}
   386  		}
   387  		var version uint32
   388  		if checkpointVersion == 1 {
   389  			version = logtail.CheckpointVersion1
   390  		} else {
   391  			version = bat.GetVectorByName(CheckpointAttr_Version).Get(i).(uint32)
   392  		}
   393  		var tnLoc objectio.Location
   394  		if version <= logtail.CheckpointVersion4 {
   395  			tnLoc = cnLoc
   396  		} else {
   397  			tnLoc = objectio.Location(bat.GetVectorByName(CheckpointAttr_AllLocations).Get(i).([]byte))
   398  		}
   399  		var ckpLSN, truncateLSN uint64
   400  		if version >= logtail.CheckpointVersion7 {
   401  			ckpLSN = bat.GetVectorByName(CheckpointAttr_CheckpointLSN).Get(i).(uint64)
   402  			truncateLSN = bat.GetVectorByName(CheckpointAttr_TruncateLSN).Get(i).(uint64)
   403  		}
   404  		checkpointEntry := &CheckpointEntry{
   405  			start:       start,
   406  			end:         end,
   407  			cnLocation:  cnLoc,
   408  			tnLocation:  tnLoc,
   409  			state:       ST_Finished,
   410  			entryType:   typ,
   411  			version:     version,
   412  			ckpLSN:      ckpLSN,
   413  			truncateLSN: truncateLSN,
   414  		}
   415  		entries[i] = checkpointEntry
   416  		if typ == ET_Global {
   417  			if end.Greater(&maxGlobalEnd) {
   418  				maxGlobalEnd = end
   419  			}
   420  		}
   421  	}
   422  	return
   423  }