github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/reader.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package blockio
    16  
    17  import (
    18  	"context"
    19  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    20  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    21  	"github.com/matrixorigin/matrixone/pkg/container/types"
    22  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    23  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    24  	"github.com/matrixorigin/matrixone/pkg/objectio"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    26  )
    27  
    28  const (
    29  	AsyncIo = 1
    30  	SyncIo  = 2
    31  )
    32  
    33  var IoModel = SyncIo
    34  
    35  type BlockReader struct {
    36  	reader *objectio.ObjectReader
    37  	aio    *IoPipeline
    38  }
    39  
    40  type fetchParams struct {
    41  	idxes  []uint16
    42  	typs   []types.Type
    43  	blk    uint16
    44  	pool   *mpool.MPool
    45  	reader *objectio.ObjectReader
    46  }
    47  
    48  func NewObjectReader(
    49  	service fileservice.FileService,
    50  	key objectio.Location,
    51  	opts ...objectio.ReaderOptionFunc,
    52  ) (*BlockReader, error) {
    53  	name := key.Name()
    54  	metaExt := key.Extent()
    55  	var reader *objectio.ObjectReader
    56  	var err error
    57  	if opts == nil {
    58  		reader, err = objectio.NewObjectReader(
    59  			&name,
    60  			&metaExt,
    61  			service,
    62  			objectio.WithMetaCachePolicyOption(fileservice.SkipMemoryCache|fileservice.SkipFullFilePreloads))
    63  	} else {
    64  		reader, err = objectio.NewObjectReader(&name, &metaExt, service, opts...)
    65  	}
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	return &BlockReader{
    70  		reader: reader,
    71  		aio:    pipeline,
    72  	}, nil
    73  }
    74  
    75  func NewFileReader(service fileservice.FileService, name string) (*BlockReader, error) {
    76  	reader, err := objectio.NewObjectReaderWithStr(
    77  		name,
    78  		service,
    79  		objectio.WithMetaCachePolicyOption(fileservice.SkipMemoryCache|fileservice.SkipFullFilePreloads))
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	return &BlockReader{
    84  		reader: reader,
    85  		aio:    pipeline,
    86  	}, nil
    87  }
    88  
    89  func NewFileReaderNoCache(service fileservice.FileService, name string) (*BlockReader, error) {
    90  	reader, err := objectio.NewObjectReaderWithStr(
    91  		name,
    92  		service,
    93  		objectio.WithDataCachePolicyOption(fileservice.SkipAllCache),
    94  		objectio.WithMetaCachePolicyOption(fileservice.SkipAllCache))
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	return &BlockReader{
    99  		reader: reader,
   100  	}, nil
   101  }
   102  
   103  // LoadColumns needs typs to generate columns, if the target table has no schema change, nil can be passed.
   104  func (r *BlockReader) LoadColumns(
   105  	ctx context.Context,
   106  	cols []uint16,
   107  	typs []types.Type,
   108  	blk uint16,
   109  	m *mpool.MPool,
   110  ) (bat *batch.Batch, release func(), err error) {
   111  	metaExt := r.reader.GetMetaExtent()
   112  	if metaExt == nil || metaExt.End() == 0 {
   113  		return
   114  	}
   115  	var ioVectors *fileservice.IOVector
   116  	if IoModel == AsyncIo {
   117  		proc := fetchParams{
   118  			idxes:  cols,
   119  			blk:    blk,
   120  			typs:   typs,
   121  			pool:   m,
   122  			reader: r.reader,
   123  		}
   124  		var v any
   125  		if v, err = r.aio.Fetch(ctx, proc); err != nil {
   126  			return
   127  		}
   128  		ioVectors = v.(*fileservice.IOVector)
   129  	} else {
   130  		ioVectors, err = r.reader.ReadOneBlock(ctx, cols, typs, blk, m)
   131  		if err != nil {
   132  			return
   133  		}
   134  	}
   135  	release = func() {
   136  		if ioVectors != nil {
   137  			objectio.ReleaseIOVector(ioVectors)
   138  		}
   139  	}
   140  	defer func() {
   141  		if err != nil {
   142  			release()
   143  		}
   144  	}()
   145  	bat = batch.NewWithSize(len(cols))
   146  	var obj any
   147  	for i := range cols {
   148  		obj, err = objectio.Decode(ioVectors.Entries[i].CachedData.Bytes())
   149  		if err != nil {
   150  			return
   151  		}
   152  		bat.Vecs[i] = obj.(*vector.Vector)
   153  		bat.SetRowCount(bat.Vecs[i].Length())
   154  	}
   155  	return
   156  }
   157  
   158  // LoadColumns needs typs to generate columns, if the target table has no schema change, nil can be passed.
   159  func (r *BlockReader) LoadSubColumns(
   160  	ctx context.Context,
   161  	cols []uint16,
   162  	typs []types.Type,
   163  	blk uint16,
   164  	m *mpool.MPool,
   165  ) (bats []*batch.Batch, releases func(), err error) {
   166  	metaExt := r.reader.GetMetaExtent()
   167  	if metaExt == nil || metaExt.End() == 0 {
   168  		return
   169  	}
   170  	var ioVectors []*fileservice.IOVector
   171  	ioVectors, err = r.reader.ReadSubBlock(ctx, cols, typs, blk, m)
   172  	if err != nil {
   173  		return
   174  	}
   175  	releases = func() {
   176  		for _, vec := range ioVectors {
   177  			objectio.ReleaseIOVector(vec)
   178  		}
   179  	}
   180  	bats = make([]*batch.Batch, 0)
   181  	for idx := range ioVectors {
   182  		bat := batch.NewWithSize(len(cols))
   183  		var obj any
   184  		for i := range cols {
   185  			obj, err = objectio.Decode(ioVectors[idx].Entries[i].CachedData.Bytes())
   186  			if err != nil {
   187  				return
   188  			}
   189  			bat.Vecs[i] = obj.(*vector.Vector)
   190  			bat.SetRowCount(bat.Vecs[i].Length())
   191  		}
   192  		bats = append(bats, bat)
   193  	}
   194  	return
   195  }
   196  
   197  // LoadColumns needs typs to generate columns, if the target table has no schema change, nil can be passed.
   198  func (r *BlockReader) LoadOneSubColumns(
   199  	ctx context.Context,
   200  	cols []uint16,
   201  	typs []types.Type,
   202  	dataType uint16,
   203  	blk uint16,
   204  	m *mpool.MPool,
   205  ) (bat *batch.Batch, release func(), err error) {
   206  	metaExt := r.reader.GetMetaExtent()
   207  	if metaExt == nil || metaExt.End() == 0 {
   208  		return
   209  	}
   210  	ioVector, err := r.reader.ReadOneSubBlock(ctx, cols, typs, dataType, blk, m)
   211  	release = func() {
   212  		objectio.ReleaseIOVector(ioVector)
   213  	}
   214  	if err != nil {
   215  		return
   216  	}
   217  	bat = batch.NewWithSize(len(cols))
   218  	var obj any
   219  	for i := range cols {
   220  		obj, err = objectio.Decode(ioVector.Entries[i].CachedData.Bytes())
   221  		if err != nil {
   222  			return
   223  		}
   224  		bat.Vecs[i] = obj.(*vector.Vector)
   225  		bat.SetRowCount(bat.Vecs[i].Length())
   226  	}
   227  	return
   228  }
   229  
   230  func (r *BlockReader) LoadAllColumns(
   231  	ctx context.Context,
   232  	idxs []uint16,
   233  	m *mpool.MPool,
   234  ) ([]*batch.Batch, func(), error) {
   235  	meta, err := r.reader.ReadAllMeta(ctx, m)
   236  	if err != nil {
   237  		return nil, nil, err
   238  	}
   239  	dataMeta := meta.MustDataMeta()
   240  	if dataMeta.BlockHeader().MetaLocation().End() == 0 {
   241  		return nil, nil, nil
   242  	}
   243  	block := dataMeta.GetBlockMeta(0)
   244  	if len(idxs) == 0 {
   245  		idxs = make([]uint16, block.GetColumnCount())
   246  		for i := range idxs {
   247  			idxs[i] = uint16(i)
   248  		}
   249  	}
   250  
   251  	bats := make([]*batch.Batch, 0)
   252  
   253  	ioVectors, err := r.reader.ReadAll(ctx, idxs, nil)
   254  	if err != nil {
   255  		return nil, nil, err
   256  	}
   257  	defer func() {
   258  		if err != nil {
   259  			if ioVectors != nil {
   260  				objectio.ReleaseIOVector(ioVectors)
   261  			}
   262  		}
   263  	}()
   264  	for y := 0; y < int(dataMeta.BlockCount()); y++ {
   265  		bat := batch.NewWithSize(len(idxs))
   266  		var obj any
   267  		for i := range idxs {
   268  			obj, err = objectio.Decode(ioVectors.Entries[y*len(idxs)+i].CachedData.Bytes())
   269  			if err != nil {
   270  				return nil, nil, err
   271  			}
   272  			bat.Vecs[i] = obj.(*vector.Vector)
   273  			bat.SetRowCount(bat.Vecs[i].Length())
   274  		}
   275  		bats = append(bats, bat)
   276  	}
   277  	return bats, func() { objectio.ReleaseIOVector(ioVectors) }, nil
   278  }
   279  
   280  func (r *BlockReader) LoadZoneMaps(
   281  	ctx context.Context,
   282  	seqnums []uint16,
   283  	id uint16,
   284  	m *mpool.MPool,
   285  ) ([]objectio.ZoneMap, error) {
   286  	return r.reader.ReadZM(ctx, id, seqnums, m)
   287  }
   288  
   289  func (r *BlockReader) LoadObjectMeta(ctx context.Context, m *mpool.MPool) (objectio.ObjectDataMeta, error) {
   290  	meta, err := r.reader.ReadMeta(ctx, m)
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	return meta.MustDataMeta(), nil
   295  }
   296  
   297  func (r *BlockReader) LoadAllBlocks(ctx context.Context, m *mpool.MPool) ([]objectio.BlockObject, error) {
   298  	meta, err := r.reader.ReadAllMeta(ctx, m)
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  	dataMeta := meta.MustDataMeta()
   303  	blocks := make([]objectio.BlockObject, dataMeta.BlockCount())
   304  	for i := 0; i < int(dataMeta.BlockCount()); i++ {
   305  		blocks[i] = dataMeta.GetBlockMeta(uint32(i))
   306  	}
   307  	return blocks, nil
   308  }
   309  
   310  func (r *BlockReader) LoadZoneMap(
   311  	ctx context.Context,
   312  	seqnums []uint16,
   313  	block objectio.BlockObject,
   314  	m *mpool.MPool) ([]objectio.ZoneMap, error) {
   315  	return block.ToColumnZoneMaps(seqnums), nil
   316  }
   317  
   318  func (r *BlockReader) LoadOneBF(
   319  	ctx context.Context,
   320  	blk uint16,
   321  ) (objectio.StaticFilter, uint32, error) {
   322  	return r.reader.ReadOneBF(ctx, blk)
   323  }
   324  
   325  func (r *BlockReader) LoadAllBF(
   326  	ctx context.Context,
   327  ) (objectio.BloomFilter, uint32, error) {
   328  	return r.reader.ReadAllBF(ctx)
   329  }
   330  
   331  func (r *BlockReader) GetObjectName() *objectio.ObjectName {
   332  	return r.reader.GetObjectName()
   333  }
   334  
   335  func (r *BlockReader) GetName() string {
   336  	return r.reader.GetName()
   337  }
   338  
   339  func (r *BlockReader) GetObjectReader() *objectio.ObjectReader {
   340  	return r.reader
   341  }
   342  
   343  // The caller has merged the block information that needs to be prefetched
   344  func PrefetchWithMerged(params PrefetchParams) error {
   345  	return pipeline.Prefetch(params)
   346  }
   347  
   348  func Prefetch(idxes []uint16, ids []uint16, service fileservice.FileService, key objectio.Location) error {
   349  	params, err := BuildPrefetchParams(service, key)
   350  	if err != nil {
   351  		return err
   352  	}
   353  	params.AddBlock(idxes, ids)
   354  	return pipeline.Prefetch(params)
   355  }
   356  
   357  func PrefetchTombstone(idxes []uint16, ids []uint16, service fileservice.FileService, key objectio.Location) error {
   358  	params, err := BuildPrefetchParams(service, key)
   359  	if err != nil {
   360  		return err
   361  	}
   362  	params.AddBlockWithType(idxes, ids, uint16(objectio.SchemaTombstone))
   363  	return pipeline.Prefetch(params)
   364  }
   365  
   366  func PrefetchMeta(service fileservice.FileService, key objectio.Location) error {
   367  	params, err := BuildPrefetchParams(service, key)
   368  	if err != nil {
   369  		return err
   370  	}
   371  	return pipeline.Prefetch(params)
   372  }
   373  
   374  func PrefetchFile(service fileservice.FileService, name string) error {
   375  	reader, err := NewFileReader(service, name)
   376  	if err != nil {
   377  		return err
   378  	}
   379  	bs, err := reader.LoadAllBlocks(context.Background(), common.DefaultAllocator)
   380  	if err != nil {
   381  		return err
   382  	}
   383  	params := buildPrefetchParamsByReader(reader)
   384  	for i := range bs {
   385  		idxes := make([]uint16, bs[i].GetColumnCount())
   386  		for a := uint16(0); a < bs[i].GetColumnCount(); a++ {
   387  			idxes[a] = a
   388  		}
   389  		params.AddBlock(idxes, []uint16{bs[i].GetID()})
   390  	}
   391  	return PrefetchWithMerged(params)
   392  }