github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/disttae/db.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package disttae 16 17 import ( 18 "context" 19 "strconv" 20 "strings" 21 "sync" 22 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 "github.com/matrixorigin/matrixone/pkg/container/types" 25 "github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/cache" 26 "github.com/matrixorigin/matrixone/pkg/vm/engine/disttae/logtailreplay" 27 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db/checkpoint" 28 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logtail" 29 30 "github.com/matrixorigin/matrixone/pkg/catalog" 31 "github.com/matrixorigin/matrixone/pkg/container/batch" 32 "github.com/matrixorigin/matrixone/pkg/container/vector" 33 "github.com/matrixorigin/matrixone/pkg/pb/timestamp" 34 ) 35 36 // init is used to insert some data that will not be synchronized by logtail. 37 func (e *Engine) init(ctx context.Context) error { 38 e.Lock() 39 defer e.Unlock() 40 m := e.mp 41 42 e.catalog = cache.NewCatalog() 43 e.partitions = make(map[[2]uint64]*logtailreplay.Partition) 44 45 var packer *types.Packer 46 put := e.packerPool.Get(&packer) 47 defer put.Put() 48 49 { 50 e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID}] = logtailreplay.NewPartition() 51 } 52 53 { 54 e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}] = logtailreplay.NewPartition() 55 } 56 57 { 58 e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}] = logtailreplay.NewPartition() 59 } 60 61 { // mo_catalog 62 part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_DATABASE_ID}] 63 bat, err := genCreateDatabaseTuple("", 0, 0, 0, catalog.MO_CATALOG, catalog.MO_CATALOG_ID, "", m) 64 if err != nil { 65 return err 66 } 67 ibat, err := genInsertBatch(bat, m) 68 if err != nil { 69 bat.Clean(m) 70 return err 71 } 72 state, done := part.MutateState() 73 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF, packer) 74 done() 75 e.catalog.InsertDatabase(bat) 76 bat.Clean(m) 77 } 78 79 { // mo_database 80 part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}] 81 cols, err := genColumns(0, catalog.MO_DATABASE, catalog.MO_CATALOG, catalog.MO_DATABASE_ID, 82 catalog.MO_CATALOG_ID, catalog.MoDatabaseTableDefs) 83 if err != nil { 84 return err 85 } 86 tbl := new(txnTable) 87 tbl.relKind = catalog.SystemOrdinaryRel 88 bat, err := genCreateTableTuple(tbl, "", 0, 0, 0, 89 catalog.MO_DATABASE, catalog.MO_DATABASE_ID, 90 catalog.MO_CATALOG_ID, catalog.MO_CATALOG, types.Rowid{}, false, m) 91 if err != nil { 92 return err 93 } 94 ibat, err := genInsertBatch(bat, m) 95 if err != nil { 96 bat.Clean(m) 97 return err 98 } 99 state, done := part.MutateState() 100 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_TABLES_REL_ID_IDX, packer) 101 done() 102 e.catalog.InsertTable(bat) 103 bat.Clean(m) 104 105 part = e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}] 106 bat = batch.NewWithSize(len(catalog.MoColumnsSchema)) 107 bat.Attrs = append(bat.Attrs, catalog.MoColumnsSchema...) 108 bat.SetRowCount(len(cols)) 109 for _, col := range cols { 110 bat0, err := genCreateColumnTuple(col, types.Rowid{}, false, m) 111 if err != nil { 112 return err 113 } 114 if bat.Vecs[0] == nil { 115 for i, vec := range bat0.Vecs { 116 bat.Vecs[i] = vector.NewVec(*vec.GetType()) 117 } 118 } 119 for i, vec := range bat0.Vecs { 120 if err := bat.Vecs[i].UnionOne(vec, 0, m); err != nil { 121 bat.Clean(m) 122 bat0.Clean(m) 123 return err 124 } 125 } 126 bat0.Clean(m) 127 } 128 ibat, err = genInsertBatch(bat, m) 129 if err != nil { 130 bat.Clean(m) 131 return err 132 } 133 state, done = part.MutateState() 134 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_COLUMNS_ATT_UNIQ_NAME_IDX, packer) 135 done() 136 e.catalog.InsertColumns(bat) 137 bat.Clean(m) 138 } 139 140 { // mo_tables 141 part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}] 142 cols, err := genColumns(0, catalog.MO_TABLES, catalog.MO_CATALOG, catalog.MO_TABLES_ID, 143 catalog.MO_CATALOG_ID, catalog.MoTablesTableDefs) 144 if err != nil { 145 return err 146 } 147 tbl := new(txnTable) 148 tbl.relKind = catalog.SystemOrdinaryRel 149 bat, err := genCreateTableTuple(tbl, "", 0, 0, 0, catalog.MO_TABLES, catalog.MO_TABLES_ID, 150 catalog.MO_CATALOG_ID, catalog.MO_CATALOG, types.Rowid{}, false, m) 151 if err != nil { 152 return err 153 } 154 ibat, err := genInsertBatch(bat, m) 155 if err != nil { 156 bat.Clean(m) 157 return err 158 } 159 state, done := part.MutateState() 160 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_TABLES_REL_ID_IDX, packer) 161 done() 162 e.catalog.InsertTable(bat) 163 bat.Clean(m) 164 165 part = e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}] 166 bat = batch.NewWithSize(len(catalog.MoColumnsSchema)) 167 bat.Attrs = append(bat.Attrs, catalog.MoColumnsSchema...) 168 bat.SetRowCount(len(cols)) 169 for _, col := range cols { 170 bat0, err := genCreateColumnTuple(col, types.Rowid{}, false, m) 171 if err != nil { 172 return err 173 } 174 if bat.Vecs[0] == nil { 175 for i, vec := range bat0.Vecs { 176 bat.Vecs[i] = vector.NewVec(*vec.GetType()) 177 } 178 } 179 for i, vec := range bat0.Vecs { 180 if err := bat.Vecs[i].UnionOne(vec, 0, m); err != nil { 181 bat.Clean(m) 182 bat0.Clean(m) 183 return err 184 } 185 } 186 bat0.Clean(m) 187 } 188 ibat, err = genInsertBatch(bat, m) 189 if err != nil { 190 bat.Clean(m) 191 return err 192 } 193 state, done = part.MutateState() 194 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_COLUMNS_ATT_UNIQ_NAME_IDX, packer) 195 done() 196 e.catalog.InsertColumns(bat) 197 bat.Clean(m) 198 } 199 200 { // mo_columns 201 part := e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_TABLES_ID}] 202 cols, err := genColumns(0, catalog.MO_COLUMNS, catalog.MO_CATALOG, catalog.MO_COLUMNS_ID, 203 catalog.MO_CATALOG_ID, catalog.MoColumnsTableDefs) 204 if err != nil { 205 return err 206 } 207 tbl := new(txnTable) 208 tbl.relKind = catalog.SystemOrdinaryRel 209 bat, err := genCreateTableTuple(tbl, "", 0, 0, 0, catalog.MO_COLUMNS, catalog.MO_COLUMNS_ID, 210 catalog.MO_CATALOG_ID, catalog.MO_CATALOG, types.Rowid{}, false, m) 211 if err != nil { 212 return err 213 } 214 ibat, err := genInsertBatch(bat, m) 215 if err != nil { 216 bat.Clean(m) 217 return err 218 } 219 state, done := part.MutateState() 220 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_TABLES_REL_ID_IDX, packer) 221 done() 222 e.catalog.InsertTable(bat) 223 bat.Clean(m) 224 225 part = e.partitions[[2]uint64{catalog.MO_CATALOG_ID, catalog.MO_COLUMNS_ID}] 226 bat = batch.NewWithSize(len(catalog.MoColumnsSchema)) 227 bat.Attrs = append(bat.Attrs, catalog.MoColumnsSchema...) 228 bat.SetRowCount(len(cols)) 229 for _, col := range cols { 230 bat0, err := genCreateColumnTuple(col, types.Rowid{}, false, m) 231 if err != nil { 232 return err 233 } 234 if bat.Vecs[0] == nil { 235 for i, vec := range bat0.Vecs { 236 bat.Vecs[i] = vector.NewVec(*vec.GetType()) 237 } 238 } 239 for i, vec := range bat0.Vecs { 240 if err := bat.Vecs[i].UnionOne(vec, 0, m); err != nil { 241 bat.Clean(m) 242 bat0.Clean(m) 243 return err 244 } 245 } 246 bat0.Clean(m) 247 } 248 ibat, err = genInsertBatch(bat, m) 249 if err != nil { 250 bat.Clean(m) 251 return err 252 } 253 state, done = part.MutateState() 254 state.HandleRowsInsert(ctx, ibat, MO_PRIMARY_OFF+catalog.MO_COLUMNS_ATT_UNIQ_NAME_IDX, packer) 255 done() 256 e.catalog.InsertColumns(bat) 257 bat.Clean(m) 258 } 259 260 return nil 261 } 262 263 func (e *Engine) getLatestCatalogCache() *cache.CatalogCache { 264 return e.catalog 265 } 266 267 func (e *Engine) loadSnapCkpForTable( 268 ctx context.Context, 269 snapCatalog *cache.CatalogCache, 270 loc string, 271 tid uint64, 272 tblName string, 273 did uint64, 274 dbName string, 275 pkSeqNum int, 276 ) error { 277 entries, closeCBs, err := logtail.LoadCheckpointEntries( 278 ctx, 279 loc, 280 tid, 281 tblName, 282 did, 283 dbName, 284 e.mp, 285 e.fs) 286 if err != nil { 287 return err 288 } 289 defer func() { 290 for _, cb := range closeCBs { 291 cb() 292 } 293 }() 294 for _, entry := range entries { 295 if err = consumeEntry(ctx, pkSeqNum, e, snapCatalog, nil, entry); err != nil { 296 return err 297 } 298 } 299 return nil 300 } 301 302 func (e *Engine) getOrCreateSnapCatalogCache( 303 ctx context.Context, 304 ts types.TS) (*cache.CatalogCache, error) { 305 if e.catalog.CanServe(ts) { 306 return e.catalog, nil 307 } 308 e.snapCatalog.Lock() 309 defer e.snapCatalog.Unlock() 310 for _, snap := range e.snapCatalog.snaps { 311 if snap.CanServe(ts) { 312 return snap, nil 313 } 314 } 315 snapCata := cache.NewCatalog() 316 //TODO:: insert mo_tables, or mo_colunms, or mo_database, mo_catalog into snapCata. 317 // ref to engine.init. 318 ckps, err := checkpoint.ListSnapshotCheckpoint(ctx, e.fs, ts, 0, nil) 319 if ckps == nil { 320 return nil, moerr.NewInternalErrorNoCtx("No checkpoints for snapshot read") 321 } 322 if err != nil { 323 return nil, err 324 } 325 //Notice that checkpoints must contain only one or zero global checkpoint 326 //followed by zero or multi continuous incremental checkpoints. 327 start := types.MaxTs() 328 end := types.TS{} 329 for _, ckp := range ckps { 330 locs := make([]string, 0) 331 locs = append(locs, ckp.GetLocation().String()) 332 locs = append(locs, strconv.Itoa(int(ckp.GetVersion()))) 333 locations := strings.Join(locs, ";") 334 //FIXME::pkSeqNum == 0? 335 if err := e.loadSnapCkpForTable( 336 ctx, 337 snapCata, 338 locations, 339 catalog.MO_DATABASE_ID, 340 catalog.MO_DATABASE, 341 catalog.MO_CATALOG_ID, 342 catalog.MO_CATALOG, 343 0); err != nil { 344 return nil, err 345 } 346 if err := e.loadSnapCkpForTable( 347 ctx, 348 snapCata, 349 locations, 350 catalog.MO_TABLES_ID, 351 catalog.MO_TABLES, 352 catalog.MO_CATALOG_ID, 353 catalog.MO_CATALOG, 0); err != nil { 354 return nil, err 355 } 356 if err := e.loadSnapCkpForTable( 357 ctx, 358 snapCata, 359 locations, 360 catalog.MO_COLUMNS_ID, 361 catalog.MO_COLUMNS, 362 catalog.MO_CATALOG_ID, 363 catalog.MO_CATALOG, 364 0); err != nil { 365 return nil, err 366 } 367 //update start and end of snapCata. 368 if ckp.GetType() == checkpoint.ET_Global { 369 start = ckp.GetEnd() 370 } 371 if ckp.GetType() == checkpoint.ET_Incremental { 372 ckpstart := ckp.GetStart() 373 if ckpstart.Less(&start) { 374 start = ckpstart 375 } 376 ckpend := ckp.GetEnd() 377 if ckpend.Greater(&end) { 378 end = ckpend 379 } 380 } 381 } 382 if end.IsEmpty() { 383 //only on global checkpoint. 384 end = start 385 } 386 if ts.Greater(&end) || ts.Less(&start) { 387 return nil, moerr.NewInternalErrorNoCtx("Invalid checkpoints for snapshot read") 388 } 389 snapCata.UpdateDuration(start, end) 390 e.snapCatalog.snaps = append(e.snapCatalog.snaps, snapCata) 391 return snapCata, nil 392 } 393 394 func (e *Engine) getOrCreateSnapPart( 395 ctx context.Context, 396 tbl *txnTable, 397 ts types.TS) (*logtailreplay.Partition, error) { 398 //check whether the snapshot partitions are available for reuse. 399 e.mu.Lock() 400 tblSnaps, ok := e.mu.snapParts[[2]uint64{tbl.db.databaseId, tbl.tableId}] 401 if !ok { 402 e.mu.snapParts[[2]uint64{tbl.db.databaseId, tbl.tableId}] = &struct { 403 sync.Mutex 404 snaps []*logtailreplay.Partition 405 }{} 406 tblSnaps = e.mu.snapParts[[2]uint64{tbl.db.databaseId, tbl.tableId}] 407 } 408 e.mu.Unlock() 409 410 tblSnaps.Lock() 411 defer tblSnaps.Unlock() 412 for _, snap := range tblSnaps.snaps { 413 if snap.CanServe(ts) { 414 return snap, nil 415 } 416 } 417 418 //new snapshot partition and apply checkpoints into it. 419 snap := logtailreplay.NewPartition() 420 //TODO::if tableId is mo_tables, or mo_colunms, or mo_database, 421 // we should init the partition,ref to engine.init 422 ckps, err := checkpoint.ListSnapshotCheckpoint(ctx, e.fs, ts, tbl.tableId, nil) 423 if err != nil { 424 return nil, err 425 } 426 snap.ConsumeSnapCkps(ctx, ckps, func( 427 checkpoint *checkpoint.CheckpointEntry, 428 state *logtailreplay.PartitionState) error { 429 locs := make([]string, 0) 430 locs = append(locs, checkpoint.GetLocation().String()) 431 locs = append(locs, strconv.Itoa(int(checkpoint.GetVersion()))) 432 locations := strings.Join(locs, ";") 433 entries, closeCBs, err := logtail.LoadCheckpointEntries( 434 ctx, 435 locations, 436 tbl.tableId, 437 tbl.tableName, 438 tbl.db.databaseId, 439 tbl.db.databaseName, 440 e.mp, 441 e.fs) 442 if err != nil { 443 return err 444 } 445 defer func() { 446 for _, cb := range closeCBs { 447 cb() 448 } 449 }() 450 for _, entry := range entries { 451 if err = consumeEntry( 452 ctx, 453 tbl.primarySeqnum, 454 e, 455 nil, 456 state, 457 entry); err != nil { 458 return err 459 } 460 } 461 return nil 462 }) 463 if snap.CanServe(ts) { 464 tblSnaps.snaps = append(tblSnaps.snaps, snap) 465 return snap, nil 466 } 467 468 start, end := snap.GetDuration() 469 //if has no checkpoints or ts > snap.end, use latest partition. 470 if snap.IsEmpty() || ts.Greater(&end) { 471 err := tbl.updateLogtail(ctx) 472 if err != nil { 473 return nil, err 474 } 475 return e.getOrCreateLatestPart(tbl.db.databaseId, tbl.tableId), nil 476 } 477 if ts.Less(&start) { 478 return nil, moerr.NewInternalErrorNoCtx( 479 "No valid checkpoints for snapshot read,maybe snapshot is too old, "+ 480 "snapshot:%s, start:%s, end:%s", 481 ts.ToTimestamp().DebugString(), 482 start.ToTimestamp().DebugString(), 483 end.ToTimestamp().DebugString()) 484 } 485 panic("impossible path") 486 } 487 488 func (e *Engine) getOrCreateLatestPart( 489 databaseId, 490 tableId uint64) *logtailreplay.Partition { 491 e.Lock() 492 defer e.Unlock() 493 partition, ok := e.partitions[[2]uint64{databaseId, tableId}] 494 if !ok { // create a new table 495 partition = logtailreplay.NewPartition() 496 e.partitions[[2]uint64{databaseId, tableId}] = partition 497 } 498 return partition 499 } 500 501 func (e *Engine) lazyLoadLatestCkp( 502 ctx context.Context, 503 tbl *txnTable) (*logtailreplay.Partition, error) { 504 part := e.getOrCreateLatestPart(tbl.db.databaseId, tbl.tableId) 505 cache := e.getLatestCatalogCache() 506 507 if err := part.ConsumeCheckpoints( 508 ctx, 509 func(checkpoint string, state *logtailreplay.PartitionState) error { 510 entries, closeCBs, err := logtail.LoadCheckpointEntries( 511 ctx, 512 checkpoint, 513 tbl.tableId, 514 tbl.tableName, 515 tbl.db.databaseId, 516 tbl.db.databaseName, 517 tbl.getTxn().engine.mp, 518 tbl.getTxn().engine.fs) 519 if err != nil { 520 return err 521 } 522 defer func() { 523 for _, cb := range closeCBs { 524 cb() 525 } 526 }() 527 for _, entry := range entries { 528 if err = consumeEntry(ctx, tbl.primarySeqnum, e, cache, state, entry); err != nil { 529 return err 530 } 531 } 532 return nil 533 }, 534 ); err != nil { 535 return nil, err 536 } 537 538 return part, nil 539 } 540 541 func (e *Engine) UpdateOfPush( 542 ctx context.Context, 543 databaseId, 544 tableId uint64, ts timestamp.Timestamp) error { 545 return e.pClient.TryToSubscribeTable(ctx, databaseId, tableId) 546 }