github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/model/aot.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "sync" 22 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 "github.com/matrixorigin/matrixone/pkg/container/types" 25 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 26 "github.com/tidwall/btree" 27 ) 28 29 // RowsT represents a group of rows 30 type RowsT[T any] interface { 31 // row count 32 Length() int 33 34 // returns a window of the group of rows 35 Window(offset, length int) T 36 } 37 38 // BlockT represents a block of rows 39 type BlockT[R RowsT[R]] interface { 40 // Append appends a group of rows into the block 41 Append(R) error 42 43 // IsAppendable specifies wether the block is appendable 44 IsAppendable() bool 45 46 // Length specifies the row count of the block 47 Length() int 48 49 String() string 50 51 // Close release the block bound resources 52 // It should be called when the block is not used 53 Close() 54 } 55 56 // AOTSnapshot represents the snapshot of a AOT 57 type AOTSnapshot[B BlockT[R], R RowsT[R]] interface { 58 // Ascend the table within the range [pivot, last] 59 Ascend(pivot B, iter func(blk B) bool) 60 61 // Descend the table within the range [pivot, first] 62 Descend(pivot B, iter func(blk B) bool) 63 } 64 65 // AOT stands for append-only-table 66 // append-only is the most common form of data organization. 67 // A basic data structure is abstracted here, which can cover 68 // most scenarios, such as logtail data and checkpoint data 69 type AOT[B BlockT[R], R RowsT[R]] struct { 70 sync.Mutex 71 blockSize int 72 appender B 73 blocks *btree.BTreeG[B] 74 blockFactory func(R) B 75 } 76 77 func NewAOT[B BlockT[R], R RowsT[R]]( 78 blockSize int, 79 blockFactory func(R) B, 80 lessFn func(_, _ B) bool) *AOT[B, R] { 81 return &AOT[B, R]{ 82 blockSize: blockSize, 83 blockFactory: blockFactory, 84 blocks: btree.NewBTreeGOptions(lessFn, btree.Options{NoLocks: true}), 85 } 86 } 87 88 func (aot *AOT[B, R]) Scan(fn func(_ B) bool) { 89 aot.Lock() 90 cpy := aot.blocks.Copy() 91 aot.Unlock() 92 cpy.Scan(fn) 93 } 94 95 func (aot *AOT[B, R]) Snapshot() AOTSnapshot[B, R] { 96 aot.Lock() 97 defer aot.Unlock() 98 return aot.blocks.Copy() 99 } 100 101 func (aot *AOT[B, R]) Close() { 102 aot.Lock() 103 defer aot.Unlock() 104 aot.blocks.Scan(func(block B) bool { 105 block.Close() 106 return true 107 }) 108 aot.blocks.Clear() 109 } 110 111 func (aot *AOT[B, R]) String() string { 112 aot.Lock() 113 cpy := aot.blocks.Copy() 114 aot.Unlock() 115 var w bytes.Buffer 116 _, _ = w.WriteString(fmt.Sprintf("AOT[Len=%d]", cpy.Len())) 117 cpy.Scan(func(block B) bool { 118 _ = w.WriteByte('\n') 119 _, _ = w.WriteString(block.String()) 120 return true 121 }) 122 123 return w.String() 124 } 125 126 func (aot *AOT[B, R]) BlockCount() int { 127 aot.Lock() 128 defer aot.Unlock() 129 return aot.blocks.Len() 130 } 131 132 func (aot *AOT[B, R]) Min() (b B) { 133 aot.Lock() 134 cpy := aot.blocks.Copy() 135 aot.Unlock() 136 b, _ = cpy.Min() 137 return 138 } 139 140 func (aot *AOT[B, R]) Max() (b B) { 141 aot.Lock() 142 cpy := aot.blocks.Copy() 143 aot.Unlock() 144 b, _ = cpy.Max() 145 return 146 } 147 148 // Truncate prunes the blocks. 149 // Deletable blocks are those have all txns prepared before the given timestamp 150 // For example: truncate the table by timestamp 151 // blocks: (Page1[bornTs=1], Page2[bornTs=10], Page3[bornTs=20]) 152 // Call Remain Delete 153 // Truncate(ts=5): (Page1,Page2,Page3), () 154 // Truncate(ts=12): (Page2,Page3), (Page1) 155 // Truncate(ts=30): (Page3), (Page1,Page2) 156 func (aot *AOT[B, R]) Truncate(stopFn func(_ B) bool) (cnt int) { 157 aot.Lock() 158 cpy := aot.blocks.Copy() 159 aot.Unlock() 160 161 valid := false // if there is a block stopping search early 162 candidates := make([]B, 0) 163 cpy.Scan(func(block B) bool { 164 if stopFn(block) { 165 // this block's bornTS >= given ts 166 valid = true 167 return false 168 } 169 candidates = append(candidates, block) 170 // logutil.Infof("candidate %s", block.String()) 171 return true 172 }) 173 174 // logutil.Infof("valid=%v, candidates len=%d", valid, len(candidates)) 175 176 // 1. clear them all? probably not a good idea, wrong checkpoint? 177 // 2. just delete one block? seems not neccessary 178 if !valid || len(candidates) <= 1 { 179 return 180 } 181 candidates = candidates[:len(candidates)-1] 182 183 aot.Lock() 184 defer aot.Unlock() 185 186 cnt = len(candidates) 187 for _, block := range candidates { 188 aot.blocks.Delete(block) 189 } 190 191 return 192 } 193 194 func (aot *AOT[B, R]) prepareAppend(rows int) (cnt int, all bool) { 195 if !aot.appender.IsAppendable() { 196 return 197 } 198 left := aot.blockSize - aot.appender.Length() 199 if rows > left { 200 cnt = left 201 } else { 202 cnt = rows 203 all = true 204 } 205 return 206 } 207 208 // One appender 209 func (aot *AOT[B, R]) Append(rows R) (err error) { 210 var ( 211 done bool 212 appended int 213 toAppend int 214 ) 215 for !done { 216 toAppend, done = aot.prepareAppend(rows.Length() - appended) 217 if toAppend == 0 { 218 newB := aot.blockFactory(rows) 219 if err = aot.appendBlock(newB); err != nil { 220 return 221 } 222 continue 223 } 224 if toAppend == rows.Length() { 225 if err = aot.appender.Append(rows); err != nil { 226 return 227 } 228 } else { 229 if err = aot.appender.Append(rows.Window(appended, toAppend)); err != nil { 230 return 231 } 232 } 233 // logutil.Infof("Appended=%d, ToAppend=%d, done=%v, AllRows=%d", appended, toAppend, done, rows.Length()) 234 appended += toAppend 235 } 236 return 237 } 238 239 func (aot *AOT[B, R]) appendBlock(block B) (err error) { 240 aot.Lock() 241 defer aot.Unlock() 242 if aot.appender.IsAppendable() && aot.appender.Length() < aot.blockSize { 243 panic(moerr.NewInternalError( 244 context.Background(), 245 "append a block but the previous block is appendable")) 246 } 247 aot.blocks.Set(block) 248 aot.appender = block 249 return 250 } 251 252 type TimedSliceBlock[R any] struct { 253 BornTS types.TS 254 Rows []R 255 } 256 257 func NewTimedSliceBlock[R any](ts types.TS) *TimedSliceBlock[R] { 258 return &TimedSliceBlock[R]{ 259 BornTS: ts, 260 Rows: make([]R, 0), 261 } 262 } 263 264 func (blk *TimedSliceBlock[R]) Append(rows R) (err error) { 265 blk.Rows = append(blk.Rows, rows) 266 return 267 } 268 269 func (blk *TimedSliceBlock[R]) IsAppendable() bool { 270 return blk != nil 271 } 272 273 func (blk *TimedSliceBlock[R]) Length() int { 274 return len(blk.Rows) 275 } 276 277 func (blk *TimedSliceBlock[R]) String() string { 278 return "TODO" 279 } 280 281 func (blk *TimedSliceBlock[R]) Close() { 282 blk.BornTS = types.TS{} 283 blk.Rows = make([]R, 0) 284 } 285 286 type BatchBlock struct { 287 *containers.Batch 288 ID uint64 289 } 290 291 func NewBatchBlock( 292 id uint64, 293 attrs []string, 294 colTypes []types.Type, 295 nullables []bool, 296 opts containers.Options) *BatchBlock { 297 bat := containers.BuildBatch(attrs, colTypes, nullables, opts) 298 block := &BatchBlock{ 299 Batch: bat, 300 ID: id, 301 } 302 return block 303 } 304 305 func (blk *BatchBlock) IsAppendable() bool { 306 return blk != nil 307 }