github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/gc/table.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gc
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    22  	"github.com/matrixorigin/matrixone/pkg/logutil"
    23  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio"
    24  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/txn/txnbase"
    28  	"sync"
    29  
    30  	"github.com/matrixorigin/matrixone/pkg/container/types"
    31  	"github.com/matrixorigin/matrixone/pkg/objectio"
    32  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logtail"
    33  )
    34  
    35  type ObjectEntry struct {
    36  	commitTS  types.TS
    37  	createTS  types.TS
    38  	dropTS    types.TS
    39  	db        uint64
    40  	table     uint64
    41  	fileIterm map[int][]uint32
    42  }
    43  
    44  // GCTable is a data structure in memory after consuming checkpoint
    45  type GCTable struct {
    46  	sync.Mutex
    47  	objects map[string]*ObjectEntry
    48  }
    49  
    50  func NewGCTable() *GCTable {
    51  	table := GCTable{
    52  		objects: make(map[string]*ObjectEntry),
    53  	}
    54  	return &table
    55  }
    56  
    57  func (t *GCTable) addObject(name string, objEntry *ObjectEntry, commitTS types.TS) {
    58  	t.Lock()
    59  	defer t.Unlock()
    60  	object := t.objects[name]
    61  	if object == nil {
    62  		t.objects[name] = objEntry
    63  		return
    64  	}
    65  	t.objects[name] = objEntry
    66  	if object.commitTS.Less(&commitTS) {
    67  		t.objects[name].commitTS = commitTS
    68  	}
    69  }
    70  
    71  func (t *GCTable) addObjectForSnapshot(name string, objEntry *ObjectEntry, commitTS types.TS, num int, row uint32) {
    72  	t.Lock()
    73  	defer t.Unlock()
    74  	object := t.objects[name]
    75  	if object == nil {
    76  		t.objects[name] = objEntry
    77  		objEntry.fileIterm = make(map[int][]uint32)
    78  		objEntry.fileIterm[num] = append(objEntry.fileIterm[num], row)
    79  		return
    80  	}
    81  	t.objects[name] = objEntry
    82  	if object.commitTS.Less(&commitTS) {
    83  		t.objects[name].commitTS = commitTS
    84  	}
    85  	if t.objects[name].fileIterm == nil {
    86  		objEntry.fileIterm = make(map[int][]uint32)
    87  	}
    88  	objEntry.fileIterm[num] = append(objEntry.fileIterm[num], row)
    89  }
    90  
    91  func (t *GCTable) deleteObject(name string) {
    92  	t.Lock()
    93  	defer t.Unlock()
    94  	delete(t.objects, name)
    95  }
    96  
    97  // Merge can merge two GCTables
    98  func (t *GCTable) Merge(GCTable *GCTable) {
    99  	for name, entry := range GCTable.objects {
   100  		t.addObject(name, entry, entry.commitTS)
   101  	}
   102  }
   103  
   104  func (t *GCTable) getObjects() map[string]*ObjectEntry {
   105  	t.Lock()
   106  	defer t.Unlock()
   107  	return t.objects
   108  }
   109  
   110  // SoftGC is to remove objectentry that can be deleted from GCTable
   111  func (t *GCTable) SoftGC(table *GCTable, ts types.TS, snapShotList map[uint32]containers.Vector, meta *logtail.SnapshotMeta) ([]string, map[uint32][]types.TS) {
   112  	gc := make([]string, 0)
   113  	snapList := make(map[uint32][]types.TS)
   114  	objects := t.getObjects()
   115  	for acct, snap := range snapShotList {
   116  		snapList[acct] = vector.MustFixedCol[types.TS](snap.GetDownstreamVector())
   117  	}
   118  	for name, entry := range objects {
   119  		objectEntry := table.objects[name]
   120  		tsList := meta.GetSnapshotList(snapList, entry.table)
   121  		if tsList == nil {
   122  			if objectEntry == nil && entry.commitTS.Less(&ts) {
   123  				gc = append(gc, name)
   124  				t.deleteObject(name)
   125  			}
   126  			continue
   127  		}
   128  		if objectEntry == nil && entry.commitTS.Less(&ts) && !isSnapshotRefers(entry, tsList, name) {
   129  			gc = append(gc, name)
   130  			t.deleteObject(name)
   131  		}
   132  	}
   133  	return gc, snapList
   134  }
   135  
   136  func isSnapshotRefers(obj *ObjectEntry, snapVec []types.TS, name string) bool {
   137  	if len(snapVec) == 0 {
   138  		return false
   139  	}
   140  	left, right := 0, len(snapVec)-1
   141  	for left <= right {
   142  		mid := left + (right-left)/2
   143  		snapTS := snapVec[mid]
   144  		if snapTS.GreaterEq(&obj.createTS) && (obj.dropTS.IsEmpty() || snapTS.Less(&obj.dropTS)) {
   145  			logutil.Infof("name: %v, isSnapshotRefers: %s, create %v, drop %v",
   146  				name, snapTS.ToString(), obj.createTS.ToString(), obj.dropTS.ToString())
   147  			return true
   148  		} else if snapTS.Less(&obj.createTS) {
   149  			left = mid + 1
   150  		} else {
   151  			right = mid - 1
   152  		}
   153  	}
   154  	return false
   155  }
   156  
   157  func (t *GCTable) UpdateTable(data *logtail.CheckpointData) {
   158  	ins := data.GetObjectBatchs()
   159  	insCommitTSVec := ins.GetVectorByName(txnbase.SnapshotAttr_CommitTS).GetDownstreamVector()
   160  	insDeleteTSVec := ins.GetVectorByName(catalog.EntryNode_DeleteAt).GetDownstreamVector()
   161  	insCreateTSVec := ins.GetVectorByName(catalog.EntryNode_CreateAt).GetDownstreamVector()
   162  	dbid := ins.GetVectorByName(catalog.SnapshotAttr_DBID).GetDownstreamVector()
   163  	tid := ins.GetVectorByName(catalog.SnapshotAttr_TID).GetDownstreamVector()
   164  
   165  	for i := 0; i < ins.Length(); i++ {
   166  		var objectStats objectio.ObjectStats
   167  		buf := ins.GetVectorByName(catalog.ObjectAttr_ObjectStats).Get(i).([]byte)
   168  		objectStats.UnMarshal(buf)
   169  		commitTS := vector.GetFixedAt[types.TS](insCommitTSVec, i)
   170  		deleteTS := vector.GetFixedAt[types.TS](insDeleteTSVec, i)
   171  		createTS := vector.GetFixedAt[types.TS](insCreateTSVec, i)
   172  		object := &ObjectEntry{
   173  			commitTS: commitTS,
   174  			createTS: createTS,
   175  			dropTS:   deleteTS,
   176  			db:       vector.GetFixedAt[uint64](dbid, i),
   177  			table:    vector.GetFixedAt[uint64](tid, i),
   178  		}
   179  		t.addObject(objectStats.ObjectName().String(), object, commitTS)
   180  	}
   181  }
   182  
   183  func (t *GCTable) UpdateTableForSnapshot(data *logtail.CheckpointData, num int) {
   184  	ins := data.GetObjectBatchs()
   185  	insCommitTSVec := ins.GetVectorByName(txnbase.SnapshotAttr_CommitTS).GetDownstreamVector()
   186  	insDeleteTSVec := ins.GetVectorByName(catalog.EntryNode_DeleteAt).GetDownstreamVector()
   187  	insCreateTSVec := ins.GetVectorByName(catalog.EntryNode_CreateAt).GetDownstreamVector()
   188  	dbid := ins.GetVectorByName(catalog.SnapshotAttr_DBID).GetDownstreamVector()
   189  	tid := ins.GetVectorByName(catalog.SnapshotAttr_TID).GetDownstreamVector()
   190  
   191  	for i := 0; i < ins.Length(); i++ {
   192  		var objectStats objectio.ObjectStats
   193  		buf := ins.GetVectorByName(catalog.ObjectAttr_ObjectStats).Get(i).([]byte)
   194  		objectStats.UnMarshal(buf)
   195  		commitTS := vector.GetFixedAt[types.TS](insCommitTSVec, i)
   196  		deleteTS := vector.GetFixedAt[types.TS](insDeleteTSVec, i)
   197  		createTS := vector.GetFixedAt[types.TS](insCreateTSVec, i)
   198  		object := &ObjectEntry{
   199  			commitTS: commitTS,
   200  			createTS: createTS,
   201  			dropTS:   deleteTS,
   202  			db:       vector.GetFixedAt[uint64](dbid, i),
   203  			table:    vector.GetFixedAt[uint64](tid, i),
   204  		}
   205  		t.addObjectForSnapshot(objectStats.ObjectName().String(), object, commitTS, num, uint32(i))
   206  
   207  	}
   208  }
   209  
   210  func (t *GCTable) makeBatchWithGCTable() []*containers.Batch {
   211  	bats := make([]*containers.Batch, 1)
   212  	bats[CreateBlock] = containers.NewBatch()
   213  	return bats
   214  }
   215  
   216  func (t *GCTable) makeBatchWithGCTableV1() []*containers.Batch {
   217  	bats := make([]*containers.Batch, 2)
   218  	bats[CreateBlock] = containers.NewBatch()
   219  	bats[DeleteBlock] = containers.NewBatch()
   220  	return bats
   221  }
   222  
   223  func (t *GCTable) closeBatch(bs []*containers.Batch) {
   224  	for i := range bs {
   225  		bs[i].Close()
   226  	}
   227  }
   228  
   229  // collectData collects data from memory that can be written to s3
   230  func (t *GCTable) collectData(files []string) []*containers.Batch {
   231  	bats := t.makeBatchWithGCTable()
   232  	for i, attr := range BlockSchemaAttr {
   233  		bats[CreateBlock].AddVector(attr, containers.MakeVector(BlockSchemaTypes[i], common.DefaultAllocator))
   234  	}
   235  	for name, entry := range t.objects {
   236  		bats[CreateBlock].GetVectorByName(GCAttrObjectName).Append([]byte(name), false)
   237  		bats[CreateBlock].GetVectorByName(GCCreateTS).Append(entry.createTS, false)
   238  		bats[CreateBlock].GetVectorByName(GCDeleteTS).Append(entry.dropTS, false)
   239  		bats[CreateBlock].GetVectorByName(GCAttrCommitTS).Append(entry.commitTS, false)
   240  		bats[CreateBlock].GetVectorByName(GCAttrTableId).Append(entry.table, false)
   241  	}
   242  	return bats
   243  }
   244  
   245  // SaveTable is to write data to s3
   246  func (t *GCTable) SaveTable(start, end types.TS, fs *objectio.ObjectFS, files []string) ([]objectio.BlockObject, error) {
   247  	bats := t.collectData(files)
   248  	defer t.closeBatch(bats)
   249  	name := blockio.EncodeCheckpointMetadataFileName(GCMetaDir, PrefixGCMeta, start, end)
   250  	writer, err := objectio.NewObjectWriterSpecial(objectio.WriterGC, name, fs.Service)
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  	for i := range bats {
   255  		if _, err := writer.WriteWithoutSeqnum(containers.ToCNBatch(bats[i])); err != nil {
   256  			return nil, err
   257  		}
   258  	}
   259  
   260  	blocks, err := writer.WriteEnd(context.Background())
   261  	return blocks, err
   262  }
   263  
   264  // SaveFullTable is to write data to s3
   265  func (t *GCTable) SaveFullTable(start, end types.TS, fs *objectio.ObjectFS, files []string) ([]objectio.BlockObject, error) {
   266  	bats := t.collectData(files)
   267  	defer t.closeBatch(bats)
   268  	name := blockio.EncodeGCMetadataFileName(GCMetaDir, PrefixGCMeta, start, end)
   269  	writer, err := objectio.NewObjectWriterSpecial(objectio.WriterGC, name, fs.Service)
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  	for i := range bats {
   274  		if _, err := writer.WriteWithoutSeqnum(containers.ToCNBatch(bats[i])); err != nil {
   275  			return nil, err
   276  		}
   277  	}
   278  
   279  	blocks, err := writer.WriteEnd(context.Background())
   280  	return blocks, err
   281  }
   282  
   283  func (t *GCTable) rebuildTableV2(bats []*containers.Batch) {
   284  	for i := 0; i < bats[CreateBlock].Length(); i++ {
   285  		name := string(bats[CreateBlock].GetVectorByName(GCAttrObjectName).Get(i).([]byte))
   286  		creatTS := bats[CreateBlock].GetVectorByName(GCCreateTS).Get(i).(types.TS)
   287  		deleteTS := bats[CreateBlock].GetVectorByName(GCDeleteTS).Get(i).(types.TS)
   288  		commitTS := bats[CreateBlock].GetVectorByName(GCAttrCommitTS).Get(i).(types.TS)
   289  		tid := bats[CreateBlock].GetVectorByName(GCAttrTableId).Get(i).(uint64)
   290  		if t.objects[name] != nil {
   291  			continue
   292  		}
   293  		object := &ObjectEntry{
   294  			createTS: creatTS,
   295  			dropTS:   deleteTS,
   296  			commitTS: commitTS,
   297  			table:    tid,
   298  		}
   299  		t.addObject(name, object, commitTS)
   300  	}
   301  }
   302  
   303  func (t *GCTable) rebuildTable(bats []*containers.Batch, ts types.TS) {
   304  	for i := 0; i < bats[CreateBlock].Length(); i++ {
   305  		name := string(bats[CreateBlock].GetVectorByName(GCAttrObjectName).Get(i).([]byte))
   306  		if t.objects[name] != nil {
   307  			continue
   308  		}
   309  		object := &ObjectEntry{
   310  			createTS: ts,
   311  			commitTS: ts,
   312  		}
   313  		t.addObject(name, object, ts)
   314  	}
   315  	for i := 0; i < bats[DeleteBlock].Length(); i++ {
   316  		name := string(bats[DeleteBlock].GetVectorByName(GCAttrObjectName).Get(i).([]byte))
   317  		if t.objects[name] == nil {
   318  			logutil.Fatalf("delete object should not be nil")
   319  		}
   320  		object := &ObjectEntry{
   321  			dropTS:   ts,
   322  			commitTS: ts,
   323  		}
   324  		t.addObject(name, object, ts)
   325  	}
   326  }
   327  
   328  func (t *GCTable) replayData(ctx context.Context,
   329  	typ BatchType,
   330  	attrs []string,
   331  	types []types.Type,
   332  	bats []*containers.Batch,
   333  	bs []objectio.BlockObject,
   334  	reader *blockio.BlockReader) (func(), error) {
   335  	idxes := make([]uint16, len(attrs))
   336  	for i := range attrs {
   337  		idxes[i] = uint16(i)
   338  	}
   339  	mobat, release, err := reader.LoadColumns(ctx, idxes, nil, bs[typ].GetID(), common.DefaultAllocator)
   340  	if err != nil {
   341  		return nil, err
   342  	}
   343  	for i := range attrs {
   344  		pkgVec := mobat.Vecs[i]
   345  		var vec containers.Vector
   346  		if pkgVec.Length() == 0 {
   347  			vec = containers.MakeVector(types[i], common.DefaultAllocator)
   348  		} else {
   349  			vec = containers.ToTNVector(pkgVec, common.DefaultAllocator)
   350  		}
   351  		bats[typ].AddVector(attrs[i], vec)
   352  	}
   353  	return release, nil
   354  }
   355  
   356  // ReadTable reads an s3 file and replays a GCTable in memory
   357  func (t *GCTable) ReadTable(ctx context.Context, name string, size int64, fs *objectio.ObjectFS, ts types.TS) error {
   358  	var release, releaseCreateBlock, releaseDeleteBlock func()
   359  	defer func() {
   360  		if release != nil {
   361  			release()
   362  		}
   363  		if releaseCreateBlock != nil {
   364  			releaseCreateBlock()
   365  		}
   366  		if releaseDeleteBlock != nil {
   367  			releaseDeleteBlock()
   368  		}
   369  	}()
   370  	reader, err := blockio.NewFileReaderNoCache(fs.Service, name)
   371  	if err != nil {
   372  		return err
   373  	}
   374  	bs, err := reader.LoadAllBlocks(ctx, common.DefaultAllocator)
   375  	if err != nil {
   376  		return err
   377  	}
   378  	if len(bs) == 1 {
   379  		bats := t.makeBatchWithGCTable()
   380  		defer t.closeBatch(bats)
   381  		release, err = t.replayData(ctx, CreateBlock, BlockSchemaAttr, BlockSchemaTypes, bats, bs, reader)
   382  		if err != nil {
   383  			return err
   384  		}
   385  		t.rebuildTableV2(bats)
   386  		return nil
   387  	}
   388  	bats := t.makeBatchWithGCTableV1()
   389  	defer t.closeBatch(bats)
   390  	releaseCreateBlock, err = t.replayData(ctx, CreateBlock, BlockSchemaAttrV1, BlockSchemaTypesV1, bats, bs, reader)
   391  	if err != nil {
   392  		return err
   393  	}
   394  	releaseDeleteBlock, err = t.replayData(ctx, DeleteBlock, BlockSchemaAttrV1, BlockSchemaTypesV1, bats, bs, reader)
   395  	if err != nil {
   396  		return err
   397  	}
   398  	t.rebuildTable(bats, ts)
   399  	return nil
   400  }
   401  
   402  // For test
   403  func (t *GCTable) Compare(table *GCTable) bool {
   404  	for name, entry := range table.objects {
   405  		object := t.objects[name]
   406  		if object == nil {
   407  			logutil.Infof("object %s is nil, create %v, drop %v", name, entry.createTS.ToString(), entry.dropTS.ToString())
   408  			return false
   409  		}
   410  		if !entry.commitTS.Equal(&object.commitTS) {
   411  			logutil.Infof("object %s commitTS is not equal", name)
   412  			return false
   413  		}
   414  	}
   415  
   416  	return len(t.objects) == len(table.objects)
   417  }
   418  
   419  func (t *GCTable) String() string {
   420  	if len(t.objects) == 0 {
   421  		return ""
   422  	}
   423  	var w bytes.Buffer
   424  	_, _ = w.WriteString("objects:[\n")
   425  	for name, entry := range t.objects {
   426  		_, _ = w.WriteString(fmt.Sprintf("name: %s, commitTS: %v ", name, entry.commitTS.ToString()))
   427  	}
   428  	_, _ = w.WriteString("]\n")
   429  	return w.String()
   430  }