github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/db/gc/table.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gc 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "github.com/matrixorigin/matrixone/pkg/container/vector" 22 "github.com/matrixorigin/matrixone/pkg/logutil" 23 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/blockio" 24 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog" 25 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 26 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 27 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/txn/txnbase" 28 "sync" 29 30 "github.com/matrixorigin/matrixone/pkg/container/types" 31 "github.com/matrixorigin/matrixone/pkg/objectio" 32 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logtail" 33 ) 34 35 type ObjectEntry struct { 36 commitTS types.TS 37 createTS types.TS 38 dropTS types.TS 39 db uint64 40 table uint64 41 fileIterm map[int][]uint32 42 } 43 44 // GCTable is a data structure in memory after consuming checkpoint 45 type GCTable struct { 46 sync.Mutex 47 objects map[string]*ObjectEntry 48 } 49 50 func NewGCTable() *GCTable { 51 table := GCTable{ 52 objects: make(map[string]*ObjectEntry), 53 } 54 return &table 55 } 56 57 func (t *GCTable) addObject(name string, objEntry *ObjectEntry, commitTS types.TS) { 58 t.Lock() 59 defer t.Unlock() 60 object := t.objects[name] 61 if object == nil { 62 t.objects[name] = objEntry 63 return 64 } 65 t.objects[name] = objEntry 66 if object.commitTS.Less(&commitTS) { 67 t.objects[name].commitTS = commitTS 68 } 69 } 70 71 func (t *GCTable) addObjectForSnapshot(name string, objEntry *ObjectEntry, commitTS types.TS, num int, row uint32) { 72 t.Lock() 73 defer t.Unlock() 74 object := t.objects[name] 75 if object == nil { 76 t.objects[name] = objEntry 77 objEntry.fileIterm = make(map[int][]uint32) 78 objEntry.fileIterm[num] = append(objEntry.fileIterm[num], row) 79 return 80 } 81 t.objects[name] = objEntry 82 if object.commitTS.Less(&commitTS) { 83 t.objects[name].commitTS = commitTS 84 } 85 if t.objects[name].fileIterm == nil { 86 objEntry.fileIterm = make(map[int][]uint32) 87 } 88 objEntry.fileIterm[num] = append(objEntry.fileIterm[num], row) 89 } 90 91 func (t *GCTable) deleteObject(name string) { 92 t.Lock() 93 defer t.Unlock() 94 delete(t.objects, name) 95 } 96 97 // Merge can merge two GCTables 98 func (t *GCTable) Merge(GCTable *GCTable) { 99 for name, entry := range GCTable.objects { 100 t.addObject(name, entry, entry.commitTS) 101 } 102 } 103 104 func (t *GCTable) getObjects() map[string]*ObjectEntry { 105 t.Lock() 106 defer t.Unlock() 107 return t.objects 108 } 109 110 // SoftGC is to remove objectentry that can be deleted from GCTable 111 func (t *GCTable) SoftGC(table *GCTable, ts types.TS, snapShotList map[uint32]containers.Vector, meta *logtail.SnapshotMeta) ([]string, map[uint32][]types.TS) { 112 gc := make([]string, 0) 113 snapList := make(map[uint32][]types.TS) 114 objects := t.getObjects() 115 for acct, snap := range snapShotList { 116 snapList[acct] = vector.MustFixedCol[types.TS](snap.GetDownstreamVector()) 117 } 118 for name, entry := range objects { 119 objectEntry := table.objects[name] 120 tsList := meta.GetSnapshotList(snapList, entry.table) 121 if tsList == nil { 122 if objectEntry == nil && entry.commitTS.Less(&ts) { 123 gc = append(gc, name) 124 t.deleteObject(name) 125 } 126 continue 127 } 128 if objectEntry == nil && entry.commitTS.Less(&ts) && !isSnapshotRefers(entry, tsList, name) { 129 gc = append(gc, name) 130 t.deleteObject(name) 131 } 132 } 133 return gc, snapList 134 } 135 136 func isSnapshotRefers(obj *ObjectEntry, snapVec []types.TS, name string) bool { 137 if len(snapVec) == 0 { 138 return false 139 } 140 left, right := 0, len(snapVec)-1 141 for left <= right { 142 mid := left + (right-left)/2 143 snapTS := snapVec[mid] 144 if snapTS.GreaterEq(&obj.createTS) && (obj.dropTS.IsEmpty() || snapTS.Less(&obj.dropTS)) { 145 logutil.Infof("name: %v, isSnapshotRefers: %s, create %v, drop %v", 146 name, snapTS.ToString(), obj.createTS.ToString(), obj.dropTS.ToString()) 147 return true 148 } else if snapTS.Less(&obj.createTS) { 149 left = mid + 1 150 } else { 151 right = mid - 1 152 } 153 } 154 return false 155 } 156 157 func (t *GCTable) UpdateTable(data *logtail.CheckpointData) { 158 ins := data.GetObjectBatchs() 159 insCommitTSVec := ins.GetVectorByName(txnbase.SnapshotAttr_CommitTS).GetDownstreamVector() 160 insDeleteTSVec := ins.GetVectorByName(catalog.EntryNode_DeleteAt).GetDownstreamVector() 161 insCreateTSVec := ins.GetVectorByName(catalog.EntryNode_CreateAt).GetDownstreamVector() 162 dbid := ins.GetVectorByName(catalog.SnapshotAttr_DBID).GetDownstreamVector() 163 tid := ins.GetVectorByName(catalog.SnapshotAttr_TID).GetDownstreamVector() 164 165 for i := 0; i < ins.Length(); i++ { 166 var objectStats objectio.ObjectStats 167 buf := ins.GetVectorByName(catalog.ObjectAttr_ObjectStats).Get(i).([]byte) 168 objectStats.UnMarshal(buf) 169 commitTS := vector.GetFixedAt[types.TS](insCommitTSVec, i) 170 deleteTS := vector.GetFixedAt[types.TS](insDeleteTSVec, i) 171 createTS := vector.GetFixedAt[types.TS](insCreateTSVec, i) 172 object := &ObjectEntry{ 173 commitTS: commitTS, 174 createTS: createTS, 175 dropTS: deleteTS, 176 db: vector.GetFixedAt[uint64](dbid, i), 177 table: vector.GetFixedAt[uint64](tid, i), 178 } 179 t.addObject(objectStats.ObjectName().String(), object, commitTS) 180 } 181 } 182 183 func (t *GCTable) UpdateTableForSnapshot(data *logtail.CheckpointData, num int) { 184 ins := data.GetObjectBatchs() 185 insCommitTSVec := ins.GetVectorByName(txnbase.SnapshotAttr_CommitTS).GetDownstreamVector() 186 insDeleteTSVec := ins.GetVectorByName(catalog.EntryNode_DeleteAt).GetDownstreamVector() 187 insCreateTSVec := ins.GetVectorByName(catalog.EntryNode_CreateAt).GetDownstreamVector() 188 dbid := ins.GetVectorByName(catalog.SnapshotAttr_DBID).GetDownstreamVector() 189 tid := ins.GetVectorByName(catalog.SnapshotAttr_TID).GetDownstreamVector() 190 191 for i := 0; i < ins.Length(); i++ { 192 var objectStats objectio.ObjectStats 193 buf := ins.GetVectorByName(catalog.ObjectAttr_ObjectStats).Get(i).([]byte) 194 objectStats.UnMarshal(buf) 195 commitTS := vector.GetFixedAt[types.TS](insCommitTSVec, i) 196 deleteTS := vector.GetFixedAt[types.TS](insDeleteTSVec, i) 197 createTS := vector.GetFixedAt[types.TS](insCreateTSVec, i) 198 object := &ObjectEntry{ 199 commitTS: commitTS, 200 createTS: createTS, 201 dropTS: deleteTS, 202 db: vector.GetFixedAt[uint64](dbid, i), 203 table: vector.GetFixedAt[uint64](tid, i), 204 } 205 t.addObjectForSnapshot(objectStats.ObjectName().String(), object, commitTS, num, uint32(i)) 206 207 } 208 } 209 210 func (t *GCTable) makeBatchWithGCTable() []*containers.Batch { 211 bats := make([]*containers.Batch, 1) 212 bats[CreateBlock] = containers.NewBatch() 213 return bats 214 } 215 216 func (t *GCTable) makeBatchWithGCTableV1() []*containers.Batch { 217 bats := make([]*containers.Batch, 2) 218 bats[CreateBlock] = containers.NewBatch() 219 bats[DeleteBlock] = containers.NewBatch() 220 return bats 221 } 222 223 func (t *GCTable) closeBatch(bs []*containers.Batch) { 224 for i := range bs { 225 bs[i].Close() 226 } 227 } 228 229 // collectData collects data from memory that can be written to s3 230 func (t *GCTable) collectData(files []string) []*containers.Batch { 231 bats := t.makeBatchWithGCTable() 232 for i, attr := range BlockSchemaAttr { 233 bats[CreateBlock].AddVector(attr, containers.MakeVector(BlockSchemaTypes[i], common.DefaultAllocator)) 234 } 235 for name, entry := range t.objects { 236 bats[CreateBlock].GetVectorByName(GCAttrObjectName).Append([]byte(name), false) 237 bats[CreateBlock].GetVectorByName(GCCreateTS).Append(entry.createTS, false) 238 bats[CreateBlock].GetVectorByName(GCDeleteTS).Append(entry.dropTS, false) 239 bats[CreateBlock].GetVectorByName(GCAttrCommitTS).Append(entry.commitTS, false) 240 bats[CreateBlock].GetVectorByName(GCAttrTableId).Append(entry.table, false) 241 } 242 return bats 243 } 244 245 // SaveTable is to write data to s3 246 func (t *GCTable) SaveTable(start, end types.TS, fs *objectio.ObjectFS, files []string) ([]objectio.BlockObject, error) { 247 bats := t.collectData(files) 248 defer t.closeBatch(bats) 249 name := blockio.EncodeCheckpointMetadataFileName(GCMetaDir, PrefixGCMeta, start, end) 250 writer, err := objectio.NewObjectWriterSpecial(objectio.WriterGC, name, fs.Service) 251 if err != nil { 252 return nil, err 253 } 254 for i := range bats { 255 if _, err := writer.WriteWithoutSeqnum(containers.ToCNBatch(bats[i])); err != nil { 256 return nil, err 257 } 258 } 259 260 blocks, err := writer.WriteEnd(context.Background()) 261 return blocks, err 262 } 263 264 // SaveFullTable is to write data to s3 265 func (t *GCTable) SaveFullTable(start, end types.TS, fs *objectio.ObjectFS, files []string) ([]objectio.BlockObject, error) { 266 bats := t.collectData(files) 267 defer t.closeBatch(bats) 268 name := blockio.EncodeGCMetadataFileName(GCMetaDir, PrefixGCMeta, start, end) 269 writer, err := objectio.NewObjectWriterSpecial(objectio.WriterGC, name, fs.Service) 270 if err != nil { 271 return nil, err 272 } 273 for i := range bats { 274 if _, err := writer.WriteWithoutSeqnum(containers.ToCNBatch(bats[i])); err != nil { 275 return nil, err 276 } 277 } 278 279 blocks, err := writer.WriteEnd(context.Background()) 280 return blocks, err 281 } 282 283 func (t *GCTable) rebuildTableV2(bats []*containers.Batch) { 284 for i := 0; i < bats[CreateBlock].Length(); i++ { 285 name := string(bats[CreateBlock].GetVectorByName(GCAttrObjectName).Get(i).([]byte)) 286 creatTS := bats[CreateBlock].GetVectorByName(GCCreateTS).Get(i).(types.TS) 287 deleteTS := bats[CreateBlock].GetVectorByName(GCDeleteTS).Get(i).(types.TS) 288 commitTS := bats[CreateBlock].GetVectorByName(GCAttrCommitTS).Get(i).(types.TS) 289 tid := bats[CreateBlock].GetVectorByName(GCAttrTableId).Get(i).(uint64) 290 if t.objects[name] != nil { 291 continue 292 } 293 object := &ObjectEntry{ 294 createTS: creatTS, 295 dropTS: deleteTS, 296 commitTS: commitTS, 297 table: tid, 298 } 299 t.addObject(name, object, commitTS) 300 } 301 } 302 303 func (t *GCTable) rebuildTable(bats []*containers.Batch, ts types.TS) { 304 for i := 0; i < bats[CreateBlock].Length(); i++ { 305 name := string(bats[CreateBlock].GetVectorByName(GCAttrObjectName).Get(i).([]byte)) 306 if t.objects[name] != nil { 307 continue 308 } 309 object := &ObjectEntry{ 310 createTS: ts, 311 commitTS: ts, 312 } 313 t.addObject(name, object, ts) 314 } 315 for i := 0; i < bats[DeleteBlock].Length(); i++ { 316 name := string(bats[DeleteBlock].GetVectorByName(GCAttrObjectName).Get(i).([]byte)) 317 if t.objects[name] == nil { 318 logutil.Fatalf("delete object should not be nil") 319 } 320 object := &ObjectEntry{ 321 dropTS: ts, 322 commitTS: ts, 323 } 324 t.addObject(name, object, ts) 325 } 326 } 327 328 func (t *GCTable) replayData(ctx context.Context, 329 typ BatchType, 330 attrs []string, 331 types []types.Type, 332 bats []*containers.Batch, 333 bs []objectio.BlockObject, 334 reader *blockio.BlockReader) (func(), error) { 335 idxes := make([]uint16, len(attrs)) 336 for i := range attrs { 337 idxes[i] = uint16(i) 338 } 339 mobat, release, err := reader.LoadColumns(ctx, idxes, nil, bs[typ].GetID(), common.DefaultAllocator) 340 if err != nil { 341 return nil, err 342 } 343 for i := range attrs { 344 pkgVec := mobat.Vecs[i] 345 var vec containers.Vector 346 if pkgVec.Length() == 0 { 347 vec = containers.MakeVector(types[i], common.DefaultAllocator) 348 } else { 349 vec = containers.ToTNVector(pkgVec, common.DefaultAllocator) 350 } 351 bats[typ].AddVector(attrs[i], vec) 352 } 353 return release, nil 354 } 355 356 // ReadTable reads an s3 file and replays a GCTable in memory 357 func (t *GCTable) ReadTable(ctx context.Context, name string, size int64, fs *objectio.ObjectFS, ts types.TS) error { 358 var release, releaseCreateBlock, releaseDeleteBlock func() 359 defer func() { 360 if release != nil { 361 release() 362 } 363 if releaseCreateBlock != nil { 364 releaseCreateBlock() 365 } 366 if releaseDeleteBlock != nil { 367 releaseDeleteBlock() 368 } 369 }() 370 reader, err := blockio.NewFileReaderNoCache(fs.Service, name) 371 if err != nil { 372 return err 373 } 374 bs, err := reader.LoadAllBlocks(ctx, common.DefaultAllocator) 375 if err != nil { 376 return err 377 } 378 if len(bs) == 1 { 379 bats := t.makeBatchWithGCTable() 380 defer t.closeBatch(bats) 381 release, err = t.replayData(ctx, CreateBlock, BlockSchemaAttr, BlockSchemaTypes, bats, bs, reader) 382 if err != nil { 383 return err 384 } 385 t.rebuildTableV2(bats) 386 return nil 387 } 388 bats := t.makeBatchWithGCTableV1() 389 defer t.closeBatch(bats) 390 releaseCreateBlock, err = t.replayData(ctx, CreateBlock, BlockSchemaAttrV1, BlockSchemaTypesV1, bats, bs, reader) 391 if err != nil { 392 return err 393 } 394 releaseDeleteBlock, err = t.replayData(ctx, DeleteBlock, BlockSchemaAttrV1, BlockSchemaTypesV1, bats, bs, reader) 395 if err != nil { 396 return err 397 } 398 t.rebuildTable(bats, ts) 399 return nil 400 } 401 402 // For test 403 func (t *GCTable) Compare(table *GCTable) bool { 404 for name, entry := range table.objects { 405 object := t.objects[name] 406 if object == nil { 407 logutil.Infof("object %s is nil, create %v, drop %v", name, entry.createTS.ToString(), entry.dropTS.ToString()) 408 return false 409 } 410 if !entry.commitTS.Equal(&object.commitTS) { 411 logutil.Infof("object %s commitTS is not equal", name) 412 return false 413 } 414 } 415 416 return len(t.objects) == len(table.objects) 417 } 418 419 func (t *GCTable) String() string { 420 if len(t.objects) == 0 { 421 return "" 422 } 423 var w bytes.Buffer 424 _, _ = w.WriteString("objects:[\n") 425 for name, entry := range t.objects { 426 _, _ = w.WriteString(fmt.Sprintf("name: %s, commitTS: %v ", name, entry.commitTS.ToString())) 427 } 428 _, _ = w.WriteString("]\n") 429 return w.String() 430 }