github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/reader.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blockio 16 17 import ( 18 "context" 19 "github.com/matrixorigin/matrixone/pkg/common/mpool" 20 "github.com/matrixorigin/matrixone/pkg/container/batch" 21 "github.com/matrixorigin/matrixone/pkg/container/types" 22 "github.com/matrixorigin/matrixone/pkg/container/vector" 23 "github.com/matrixorigin/matrixone/pkg/fileservice" 24 "github.com/matrixorigin/matrixone/pkg/objectio" 25 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 26 ) 27 28 const ( 29 AsyncIo = 1 30 SyncIo = 2 31 ) 32 33 var IoModel = SyncIo 34 35 type BlockReader struct { 36 reader *objectio.ObjectReader 37 aio *IoPipeline 38 } 39 40 type fetchParams struct { 41 idxes []uint16 42 typs []types.Type 43 blk uint16 44 pool *mpool.MPool 45 reader *objectio.ObjectReader 46 } 47 48 func NewObjectReader( 49 service fileservice.FileService, 50 key objectio.Location, 51 opts ...objectio.ReaderOptionFunc, 52 ) (*BlockReader, error) { 53 name := key.Name() 54 metaExt := key.Extent() 55 var reader *objectio.ObjectReader 56 var err error 57 if opts == nil { 58 reader, err = objectio.NewObjectReader( 59 &name, 60 &metaExt, 61 service, 62 objectio.WithMetaCachePolicyOption(fileservice.SkipMemoryCache|fileservice.SkipFullFilePreloads)) 63 } else { 64 reader, err = objectio.NewObjectReader(&name, &metaExt, service, opts...) 65 } 66 if err != nil { 67 return nil, err 68 } 69 return &BlockReader{ 70 reader: reader, 71 aio: pipeline, 72 }, nil 73 } 74 75 func NewFileReader(service fileservice.FileService, name string) (*BlockReader, error) { 76 reader, err := objectio.NewObjectReaderWithStr( 77 name, 78 service, 79 objectio.WithMetaCachePolicyOption(fileservice.SkipMemoryCache|fileservice.SkipFullFilePreloads)) 80 if err != nil { 81 return nil, err 82 } 83 return &BlockReader{ 84 reader: reader, 85 aio: pipeline, 86 }, nil 87 } 88 89 func NewFileReaderNoCache(service fileservice.FileService, name string) (*BlockReader, error) { 90 reader, err := objectio.NewObjectReaderWithStr( 91 name, 92 service, 93 objectio.WithDataCachePolicyOption(fileservice.SkipAllCache), 94 objectio.WithMetaCachePolicyOption(fileservice.SkipAllCache)) 95 if err != nil { 96 return nil, err 97 } 98 return &BlockReader{ 99 reader: reader, 100 }, nil 101 } 102 103 // LoadColumns needs typs to generate columns, if the target table has no schema change, nil can be passed. 104 func (r *BlockReader) LoadColumns( 105 ctx context.Context, 106 cols []uint16, 107 typs []types.Type, 108 blk uint16, 109 m *mpool.MPool, 110 ) (bat *batch.Batch, release func(), err error) { 111 metaExt := r.reader.GetMetaExtent() 112 if metaExt == nil || metaExt.End() == 0 { 113 return 114 } 115 var ioVectors *fileservice.IOVector 116 if IoModel == AsyncIo { 117 proc := fetchParams{ 118 idxes: cols, 119 blk: blk, 120 typs: typs, 121 pool: m, 122 reader: r.reader, 123 } 124 var v any 125 if v, err = r.aio.Fetch(ctx, proc); err != nil { 126 return 127 } 128 ioVectors = v.(*fileservice.IOVector) 129 } else { 130 ioVectors, err = r.reader.ReadOneBlock(ctx, cols, typs, blk, m) 131 if err != nil { 132 return 133 } 134 } 135 release = func() { 136 if ioVectors != nil { 137 objectio.ReleaseIOVector(ioVectors) 138 } 139 } 140 defer func() { 141 if err != nil { 142 release() 143 } 144 }() 145 bat = batch.NewWithSize(len(cols)) 146 var obj any 147 for i := range cols { 148 obj, err = objectio.Decode(ioVectors.Entries[i].CachedData.Bytes()) 149 if err != nil { 150 return 151 } 152 bat.Vecs[i] = obj.(*vector.Vector) 153 bat.SetRowCount(bat.Vecs[i].Length()) 154 } 155 return 156 } 157 158 // LoadColumns needs typs to generate columns, if the target table has no schema change, nil can be passed. 159 func (r *BlockReader) LoadSubColumns( 160 ctx context.Context, 161 cols []uint16, 162 typs []types.Type, 163 blk uint16, 164 m *mpool.MPool, 165 ) (bats []*batch.Batch, releases func(), err error) { 166 metaExt := r.reader.GetMetaExtent() 167 if metaExt == nil || metaExt.End() == 0 { 168 return 169 } 170 var ioVectors []*fileservice.IOVector 171 ioVectors, err = r.reader.ReadSubBlock(ctx, cols, typs, blk, m) 172 if err != nil { 173 return 174 } 175 releases = func() { 176 for _, vec := range ioVectors { 177 objectio.ReleaseIOVector(vec) 178 } 179 } 180 bats = make([]*batch.Batch, 0) 181 for idx := range ioVectors { 182 bat := batch.NewWithSize(len(cols)) 183 var obj any 184 for i := range cols { 185 obj, err = objectio.Decode(ioVectors[idx].Entries[i].CachedData.Bytes()) 186 if err != nil { 187 return 188 } 189 bat.Vecs[i] = obj.(*vector.Vector) 190 bat.SetRowCount(bat.Vecs[i].Length()) 191 } 192 bats = append(bats, bat) 193 } 194 return 195 } 196 197 // LoadColumns needs typs to generate columns, if the target table has no schema change, nil can be passed. 198 func (r *BlockReader) LoadOneSubColumns( 199 ctx context.Context, 200 cols []uint16, 201 typs []types.Type, 202 dataType uint16, 203 blk uint16, 204 m *mpool.MPool, 205 ) (bat *batch.Batch, release func(), err error) { 206 metaExt := r.reader.GetMetaExtent() 207 if metaExt == nil || metaExt.End() == 0 { 208 return 209 } 210 ioVector, err := r.reader.ReadOneSubBlock(ctx, cols, typs, dataType, blk, m) 211 release = func() { 212 objectio.ReleaseIOVector(ioVector) 213 } 214 if err != nil { 215 return 216 } 217 bat = batch.NewWithSize(len(cols)) 218 var obj any 219 for i := range cols { 220 obj, err = objectio.Decode(ioVector.Entries[i].CachedData.Bytes()) 221 if err != nil { 222 return 223 } 224 bat.Vecs[i] = obj.(*vector.Vector) 225 bat.SetRowCount(bat.Vecs[i].Length()) 226 } 227 return 228 } 229 230 func (r *BlockReader) LoadAllColumns( 231 ctx context.Context, 232 idxs []uint16, 233 m *mpool.MPool, 234 ) ([]*batch.Batch, func(), error) { 235 meta, err := r.reader.ReadAllMeta(ctx, m) 236 if err != nil { 237 return nil, nil, err 238 } 239 dataMeta := meta.MustDataMeta() 240 if dataMeta.BlockHeader().MetaLocation().End() == 0 { 241 return nil, nil, nil 242 } 243 block := dataMeta.GetBlockMeta(0) 244 if len(idxs) == 0 { 245 idxs = make([]uint16, block.GetColumnCount()) 246 for i := range idxs { 247 idxs[i] = uint16(i) 248 } 249 } 250 251 bats := make([]*batch.Batch, 0) 252 253 ioVectors, err := r.reader.ReadAll(ctx, idxs, nil) 254 if err != nil { 255 return nil, nil, err 256 } 257 defer func() { 258 if err != nil { 259 if ioVectors != nil { 260 objectio.ReleaseIOVector(ioVectors) 261 } 262 } 263 }() 264 for y := 0; y < int(dataMeta.BlockCount()); y++ { 265 bat := batch.NewWithSize(len(idxs)) 266 var obj any 267 for i := range idxs { 268 obj, err = objectio.Decode(ioVectors.Entries[y*len(idxs)+i].CachedData.Bytes()) 269 if err != nil { 270 return nil, nil, err 271 } 272 bat.Vecs[i] = obj.(*vector.Vector) 273 bat.SetRowCount(bat.Vecs[i].Length()) 274 } 275 bats = append(bats, bat) 276 } 277 return bats, func() { objectio.ReleaseIOVector(ioVectors) }, nil 278 } 279 280 func (r *BlockReader) LoadZoneMaps( 281 ctx context.Context, 282 seqnums []uint16, 283 id uint16, 284 m *mpool.MPool, 285 ) ([]objectio.ZoneMap, error) { 286 return r.reader.ReadZM(ctx, id, seqnums, m) 287 } 288 289 func (r *BlockReader) LoadObjectMeta(ctx context.Context, m *mpool.MPool) (objectio.ObjectDataMeta, error) { 290 meta, err := r.reader.ReadMeta(ctx, m) 291 if err != nil { 292 return nil, err 293 } 294 return meta.MustDataMeta(), nil 295 } 296 297 func (r *BlockReader) LoadAllBlocks(ctx context.Context, m *mpool.MPool) ([]objectio.BlockObject, error) { 298 meta, err := r.reader.ReadAllMeta(ctx, m) 299 if err != nil { 300 return nil, err 301 } 302 dataMeta := meta.MustDataMeta() 303 blocks := make([]objectio.BlockObject, dataMeta.BlockCount()) 304 for i := 0; i < int(dataMeta.BlockCount()); i++ { 305 blocks[i] = dataMeta.GetBlockMeta(uint32(i)) 306 } 307 return blocks, nil 308 } 309 310 func (r *BlockReader) LoadZoneMap( 311 ctx context.Context, 312 seqnums []uint16, 313 block objectio.BlockObject, 314 m *mpool.MPool) ([]objectio.ZoneMap, error) { 315 return block.ToColumnZoneMaps(seqnums), nil 316 } 317 318 func (r *BlockReader) LoadOneBF( 319 ctx context.Context, 320 blk uint16, 321 ) (objectio.StaticFilter, uint32, error) { 322 return r.reader.ReadOneBF(ctx, blk) 323 } 324 325 func (r *BlockReader) LoadAllBF( 326 ctx context.Context, 327 ) (objectio.BloomFilter, uint32, error) { 328 return r.reader.ReadAllBF(ctx) 329 } 330 331 func (r *BlockReader) GetObjectName() *objectio.ObjectName { 332 return r.reader.GetObjectName() 333 } 334 335 func (r *BlockReader) GetName() string { 336 return r.reader.GetName() 337 } 338 339 func (r *BlockReader) GetObjectReader() *objectio.ObjectReader { 340 return r.reader 341 } 342 343 // The caller has merged the block information that needs to be prefetched 344 func PrefetchWithMerged(params PrefetchParams) error { 345 return pipeline.Prefetch(params) 346 } 347 348 func Prefetch(idxes []uint16, ids []uint16, service fileservice.FileService, key objectio.Location) error { 349 params, err := BuildPrefetchParams(service, key) 350 if err != nil { 351 return err 352 } 353 params.AddBlock(idxes, ids) 354 return pipeline.Prefetch(params) 355 } 356 357 func PrefetchTombstone(idxes []uint16, ids []uint16, service fileservice.FileService, key objectio.Location) error { 358 params, err := BuildPrefetchParams(service, key) 359 if err != nil { 360 return err 361 } 362 params.AddBlockWithType(idxes, ids, uint16(objectio.SchemaTombstone)) 363 return pipeline.Prefetch(params) 364 } 365 366 func PrefetchMeta(service fileservice.FileService, key objectio.Location) error { 367 params, err := BuildPrefetchParams(service, key) 368 if err != nil { 369 return err 370 } 371 return pipeline.Prefetch(params) 372 } 373 374 func PrefetchFile(service fileservice.FileService, name string) error { 375 reader, err := NewFileReader(service, name) 376 if err != nil { 377 return err 378 } 379 bs, err := reader.LoadAllBlocks(context.Background(), common.DefaultAllocator) 380 if err != nil { 381 return err 382 } 383 params := buildPrefetchParamsByReader(reader) 384 for i := range bs { 385 idxes := make([]uint16, bs[i].GetColumnCount()) 386 for a := uint16(0); a < bs[i].GetColumnCount(); a++ { 387 idxes[a] = a 388 } 389 params.AddBlock(idxes, []uint16{bs[i].GetID()}) 390 } 391 return PrefetchWithMerged(params) 392 }