github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/model/aot.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"sync"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    24  	"github.com/matrixorigin/matrixone/pkg/container/types"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    26  	"github.com/tidwall/btree"
    27  )
    28  
    29  // RowsT represents a group of rows
    30  type RowsT[T any] interface {
    31  	// row count
    32  	Length() int
    33  
    34  	// returns a window of the group of rows
    35  	Window(offset, length int) T
    36  }
    37  
    38  // BlockT represents a block of rows
    39  type BlockT[R RowsT[R]] interface {
    40  	// Append appends a group of rows into the block
    41  	Append(R) error
    42  
    43  	// IsAppendable specifies wether the block is appendable
    44  	IsAppendable() bool
    45  
    46  	// Length specifies the row count of the block
    47  	Length() int
    48  
    49  	String() string
    50  
    51  	// Close release the block bound resources
    52  	// It should be called when the block is not used
    53  	Close()
    54  }
    55  
    56  // AOTSnapshot represents the snapshot of a AOT
    57  type AOTSnapshot[B BlockT[R], R RowsT[R]] interface {
    58  	// Ascend the table within the range [pivot, last]
    59  	Ascend(pivot B, iter func(blk B) bool)
    60  
    61  	// Descend the table within the range [pivot, first]
    62  	Descend(pivot B, iter func(blk B) bool)
    63  }
    64  
    65  // AOT stands for append-only-table
    66  // append-only is the most common form of data organization.
    67  // A basic data structure is abstracted here, which can cover
    68  // most scenarios, such as logtail data and checkpoint data
    69  type AOT[B BlockT[R], R RowsT[R]] struct {
    70  	sync.Mutex
    71  	blockSize    int
    72  	appender     B
    73  	blocks       *btree.BTreeG[B]
    74  	blockFactory func(R) B
    75  }
    76  
    77  func NewAOT[B BlockT[R], R RowsT[R]](
    78  	blockSize int,
    79  	blockFactory func(R) B,
    80  	lessFn func(_, _ B) bool) *AOT[B, R] {
    81  	return &AOT[B, R]{
    82  		blockSize:    blockSize,
    83  		blockFactory: blockFactory,
    84  		blocks:       btree.NewBTreeGOptions(lessFn, btree.Options{NoLocks: true}),
    85  	}
    86  }
    87  
    88  func (aot *AOT[B, R]) Scan(fn func(_ B) bool) {
    89  	aot.Lock()
    90  	cpy := aot.blocks.Copy()
    91  	aot.Unlock()
    92  	cpy.Scan(fn)
    93  }
    94  
    95  func (aot *AOT[B, R]) Snapshot() AOTSnapshot[B, R] {
    96  	aot.Lock()
    97  	defer aot.Unlock()
    98  	return aot.blocks.Copy()
    99  }
   100  
   101  func (aot *AOT[B, R]) Close() {
   102  	aot.Lock()
   103  	defer aot.Unlock()
   104  	aot.blocks.Scan(func(block B) bool {
   105  		block.Close()
   106  		return true
   107  	})
   108  	aot.blocks.Clear()
   109  }
   110  
   111  func (aot *AOT[B, R]) String() string {
   112  	aot.Lock()
   113  	cpy := aot.blocks.Copy()
   114  	aot.Unlock()
   115  	var w bytes.Buffer
   116  	_, _ = w.WriteString(fmt.Sprintf("AOT[Len=%d]", cpy.Len()))
   117  	cpy.Scan(func(block B) bool {
   118  		_ = w.WriteByte('\n')
   119  		_, _ = w.WriteString(block.String())
   120  		return true
   121  	})
   122  
   123  	return w.String()
   124  }
   125  
   126  func (aot *AOT[B, R]) BlockCount() int {
   127  	aot.Lock()
   128  	defer aot.Unlock()
   129  	return aot.blocks.Len()
   130  }
   131  
   132  func (aot *AOT[B, R]) Min() (b B) {
   133  	aot.Lock()
   134  	cpy := aot.blocks.Copy()
   135  	aot.Unlock()
   136  	b, _ = cpy.Min()
   137  	return
   138  }
   139  
   140  func (aot *AOT[B, R]) Max() (b B) {
   141  	aot.Lock()
   142  	cpy := aot.blocks.Copy()
   143  	aot.Unlock()
   144  	b, _ = cpy.Max()
   145  	return
   146  }
   147  
   148  // Truncate prunes the blocks.
   149  // Deletable blocks are those have all txns prepared before the given timestamp
   150  // For example: truncate the table by timestamp
   151  // blocks:           (Page1[bornTs=1], Page2[bornTs=10], Page3[bornTs=20])
   152  // Call              Remain               Delete
   153  // Truncate(ts=5):   (Page1,Page2,Page3), ()
   154  // Truncate(ts=12):  (Page2,Page3),       (Page1)
   155  // Truncate(ts=30):  (Page3),             (Page1,Page2)
   156  func (aot *AOT[B, R]) Truncate(stopFn func(_ B) bool) (cnt int) {
   157  	aot.Lock()
   158  	cpy := aot.blocks.Copy()
   159  	aot.Unlock()
   160  
   161  	valid := false // if there is a block stopping search early
   162  	candidates := make([]B, 0)
   163  	cpy.Scan(func(block B) bool {
   164  		if stopFn(block) {
   165  			// this block's bornTS >= given ts
   166  			valid = true
   167  			return false
   168  		}
   169  		candidates = append(candidates, block)
   170  		// logutil.Infof("candidate %s", block.String())
   171  		return true
   172  	})
   173  
   174  	// logutil.Infof("valid=%v, candidates len=%d", valid, len(candidates))
   175  
   176  	// 1. clear them all? probably not a good idea, wrong checkpoint?
   177  	// 2. just delete one block? seems not neccessary
   178  	if !valid || len(candidates) <= 1 {
   179  		return
   180  	}
   181  	candidates = candidates[:len(candidates)-1]
   182  
   183  	aot.Lock()
   184  	defer aot.Unlock()
   185  
   186  	cnt = len(candidates)
   187  	for _, block := range candidates {
   188  		aot.blocks.Delete(block)
   189  	}
   190  
   191  	return
   192  }
   193  
   194  func (aot *AOT[B, R]) prepareAppend(rows int) (cnt int, all bool) {
   195  	if !aot.appender.IsAppendable() {
   196  		return
   197  	}
   198  	left := aot.blockSize - aot.appender.Length()
   199  	if rows > left {
   200  		cnt = left
   201  	} else {
   202  		cnt = rows
   203  		all = true
   204  	}
   205  	return
   206  }
   207  
   208  // One appender
   209  func (aot *AOT[B, R]) Append(rows R) (err error) {
   210  	var (
   211  		done     bool
   212  		appended int
   213  		toAppend int
   214  	)
   215  	for !done {
   216  		toAppend, done = aot.prepareAppend(rows.Length() - appended)
   217  		if toAppend == 0 {
   218  			newB := aot.blockFactory(rows)
   219  			if err = aot.appendBlock(newB); err != nil {
   220  				return
   221  			}
   222  			continue
   223  		}
   224  		if toAppend == rows.Length() {
   225  			if err = aot.appender.Append(rows); err != nil {
   226  				return
   227  			}
   228  		} else {
   229  			if err = aot.appender.Append(rows.Window(appended, toAppend)); err != nil {
   230  				return
   231  			}
   232  		}
   233  		// logutil.Infof("Appended=%d, ToAppend=%d, done=%v, AllRows=%d", appended, toAppend, done, rows.Length())
   234  		appended += toAppend
   235  	}
   236  	return
   237  }
   238  
   239  func (aot *AOT[B, R]) appendBlock(block B) (err error) {
   240  	aot.Lock()
   241  	defer aot.Unlock()
   242  	if aot.appender.IsAppendable() && aot.appender.Length() < aot.blockSize {
   243  		panic(moerr.NewInternalError(
   244  			context.Background(),
   245  			"append a block but the previous block is appendable"))
   246  	}
   247  	aot.blocks.Set(block)
   248  	aot.appender = block
   249  	return
   250  }
   251  
   252  type TimedSliceBlock[R any] struct {
   253  	BornTS types.TS
   254  	Rows   []R
   255  }
   256  
   257  func NewTimedSliceBlock[R any](ts types.TS) *TimedSliceBlock[R] {
   258  	return &TimedSliceBlock[R]{
   259  		BornTS: ts,
   260  		Rows:   make([]R, 0),
   261  	}
   262  }
   263  
   264  func (blk *TimedSliceBlock[R]) Append(rows R) (err error) {
   265  	blk.Rows = append(blk.Rows, rows)
   266  	return
   267  }
   268  
   269  func (blk *TimedSliceBlock[R]) IsAppendable() bool {
   270  	return blk != nil
   271  }
   272  
   273  func (blk *TimedSliceBlock[R]) Length() int {
   274  	return len(blk.Rows)
   275  }
   276  
   277  func (blk *TimedSliceBlock[R]) String() string {
   278  	return "TODO"
   279  }
   280  
   281  func (blk *TimedSliceBlock[R]) Close() {
   282  	blk.BornTS = types.TS{}
   283  	blk.Rows = make([]R, 0)
   284  }
   285  
   286  type BatchBlock struct {
   287  	*containers.Batch
   288  	ID uint64
   289  }
   290  
   291  func NewBatchBlock(
   292  	id uint64,
   293  	attrs []string,
   294  	colTypes []types.Type,
   295  	nullables []bool,
   296  	opts containers.Options) *BatchBlock {
   297  	bat := containers.BuildBatch(attrs, colTypes, nullables, opts)
   298  	block := &BatchBlock{
   299  		Batch: bat,
   300  		ID:    id,
   301  	}
   302  	return block
   303  }
   304  
   305  func (blk *BatchBlock) IsAppendable() bool {
   306  	return blk != nil
   307  }